Web_Scraping Project

MagicBricks_website Data_Collection

In [209]:
# Import Libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import re
In [149]:
# Code for the collection of the data
# here list of 4 cities are taken and collection of data is done for them
cities = ['bangalore','mumbai','chennai','visakhapatnam']
price=[]
per_sqt=[]
BHK=[]
Carpet_Area=[]
Owner=[]
Floor=[]
Area=[]
City=[]
for j in cities:
    url = "https://www.magicbricks.com/flats-in-"+j+"-for-sale-pppfs"
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text)
    
    for i in soup.find_all("div",class_="mb-srp__card__price--amount"):
        price.append(i.text)
        
    for i in soup.find_all("div",class_="mb-srp__card__price--size"):
        per_sqt.append(i.text.split(' ')[0])
        
    for i in soup.find_all("h2",class_="mb-srp__card--title"):
        BHK.append(i.text[0])
        
    for i in soup.find_all("div",class_="mb-srp__card__summary--value"):
        a =i.text
        b = re.findall(r"(\d+)\ssqft",str(a))
        if b:
            Carpet_Area.append(b[0])
            
    for i in soup.find_all('div',class_='mb-srp__card__ads--name'):
        Owner.append(i.text.split(':')[1])
            
    for i in soup.find_all("div",class_="mb-srp__card__summary__list--item"):
        a = i.text
        b = re.findall(r"\w+\s\w+\s\w+\s\d+",a)
        if b:
            Floor.append(b[0])
            
    for i in soup.find_all("h2",class_="mb-srp__card--title"):
        Area.append(i.text.split('in')[1].split(',')[0])
        City.append(j)
    print(i,'---->', len(Owner))
    print(i,'---->', len(Floor))
    print(i,'---->', len(per_sqt))
    
https://www.magicbricks.com/flats-in-bangalore-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yelahanka , Bangalore</h2> ----> 30
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yelahanka , Bangalore</h2> ----> 30
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yelahanka , Bangalore</h2> ----> 29
https://www.magicbricks.com/flats-in-mumbai-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Mulund West , Mumbai</h2> ----> 60
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Mulund West , Mumbai</h2> ----> 58
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Mulund West , Mumbai</h2> ----> 52
https://www.magicbricks.com/flats-in-chennai-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Chennai</h2> ----> 90
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Chennai</h2> ----> 88
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Chennai</h2> ----> 81
https://www.magicbricks.com/flats-in-visakhapatnam-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in MVP Colony , Visakhapatnam</h2> ----> 120
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in MVP Colony , Visakhapatnam</h2> ----> 118
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in MVP Colony , Visakhapatnam</h2> ----> 110
In [159]:
# printing to see the values are obtained are not
print(price)
print(per_sqt)
print(BHK)
print(Carpet_Area)
print(Owner)
print(Floor)
print(Area)
print(City)

# Finding the length to see is all the lengths are same
print(len(price))
print(len(per_sqt))
print(len(BHK))
print(len(Carpet_Area))
print(len(Owner))
print(len(Floor))
print(len(Area))
print(len(City))
['₹60 Lac ', '₹80 Lac ', '₹48 Lac ', '₹1.35 Cr ', '₹61 Lac ', '₹1.30 Cr ', '₹50 Lac ', '₹1.80 Cr ', '₹1 Cr ', '₹1.02 Cr ', '₹1.45 Cr ', '₹3.85 Cr ', '₹70 Lac ', '₹85 Lac ', '₹98 Lac ', '₹60 Lac ', '₹78 Lac ', '₹57 Lac ', '₹1.08 Cr ', '₹57 Lac ', '₹35 Lac ', '₹1.80 Cr ', '₹2.75 Cr ', '₹1.12 Cr ', '₹1 Cr ', '₹1.45 Cr ', '₹80 Lac ', '₹1.35 Cr ', '₹2.10 Cr ', '₹51.2 Lac ', '₹73 Lac ', '₹22 Lac ', '₹2.25 Cr ', '₹2.70 Cr ', '₹41 Lac ', '₹5.99 Cr ', '₹1.20 Cr ', '₹1.30 Cr ', '₹65 Lac ', '₹3.75 Cr ', '₹2.70 Cr ', '₹1.02 Cr ', '₹95 Lac ', '₹60 Lac ', '₹15 Cr ', '₹32 Lac ', '₹98 Lac ', '₹4.89 Cr ', '₹44.9 Lac ', '₹70 Lac ', '₹1.05 Cr ', '₹30 Lac ', '₹1.25 Cr ', '₹1.90 Cr ', '₹1.55 Cr ', '₹1.28 Cr ', '₹2 Cr ', '₹2.52 Cr ', '₹1 Cr ', '₹1.60 Cr ', '₹55 Lac ', '₹80 Lac ', '₹60 Lac ', '₹60 Lac ', '₹85 Lac ', '₹49 Lac ', '₹55 Lac ', '₹53 Lac ', '₹69 Lac ', '₹1.25 Cr ', '₹87 Lac ', '₹32 Lac ', '₹71 Lac ', '₹85 Lac ', '₹47 Lac ', '₹46 Lac ', '₹43 Lac ', '₹1.15 Cr ', '₹27 Lac ', '₹29.7 Lac ', '₹59 Lac ', '₹45 Lac ', '₹35 Lac ', '₹74 Lac ', '₹39 Lac ', '₹46 Lac ', '₹1.30 Cr ', '₹40 Lac ', '₹70 Lac ', '₹74.9 Lac ', '₹47.5 Lac ', '₹72 Lac ', '₹97 Lac ', '₹63 Lac ', '₹35 Lac ', '₹63.6 Lac ', '₹47 Lac ', '₹1 Cr ', '₹38 Lac ', '₹30 Lac ', '₹97.5 Lac ', '₹30 Lac ', '₹30 Lac ', '₹55 Lac ', '₹60 Lac ', '₹73 Lac ', '₹45 Lac ', '₹89 Lac ', '₹70 Lac ', '₹49 Lac ', '₹43.6 Lac ', '₹56 Lac ', '₹97 Lac ', '₹47 Lac ', '₹78 Lac ', '₹38.9 Lac ', '₹26 Lac ', '₹42 Lac ', '₹38 Lac ', '₹78 Lac ']
['₹4950', '₹6957', '₹4528', '₹7542', '₹5706', '₹7303', '₹5000', '₹11335', '₹8889', '₹6997', '₹8146', '₹20632', '₹5655', '₹6071', '₹9800', '₹6367', '₹5158', '₹6374', '₹4553', '₹4545', '₹7500', '₹12500', '₹7705', '₹5305', '₹7436', '₹5333', '₹7418', '₹10194', '₹4650', '₹26471', '₹23276', '₹16667', nan, '₹23636', '₹37500', '₹27136', '₹18545', '₹60000', '₹5047', nan, '₹11529', '₹38459', '₹7242', nan, '₹10000', '₹14583', '₹5376', '₹12042', '₹13669', '₹19375', '₹21333', '₹33333', '₹22951', '₹10695', '₹25316', '₹5268', '₹8658', '₹5000', '₹4444', '₹8947', nan, '₹4895', '₹6111', '₹5268', '₹5712', '₹8367', '₹8406', nan, '₹5086', '₹9140', '₹6112', '₹7931', '₹4335', '₹11616', '₹4060', '₹3999', '₹7584', '₹5294', '₹6481', '₹6198', '₹4343', '₹6571', '₹11818', '₹5249', '₹6763', '₹8000', '₹3242', '₹5143', '₹4615', '₹4000', '₹4667', '₹4130', '₹4087', '₹7634', '₹3619', '₹3046', '₹7500', '₹5000', '₹3000', '₹3667', '₹3750', '₹4056', '₹3309', '₹5235', '₹4118', '₹3250', '₹6054', '₹5173', '₹4215', '₹4756', '₹3700', '₹2600', '₹4667', '₹4222', '₹7091', nan, nan, nan, nan, nan]
['3', '2', '2', '3', '2', '3', '2', '3', '3', '2', '3', '3', '3', '2', '2', '2', '2', '2', '3', '3', '2', '4', '3', '3', '3', '3', '3', '3', '3', '2', '1', '1', '2', '3', '1', '3', '2', '1', '1', '2', '2', '1', '1', '2', '4', '2', '2', '3', '1', '2', '1', '1', '2', '3', '2', '2', '1', '2', '2', '2', '2', '2', '3', '2', '2', '2', '3', '2', '3', '2', '2', '1', '2', '2', '2', '1', '2', '2', '1', '2', '2', '2', '1', '3', '2', '2', '2', '2', '2', '2', '3', '2', '3', '3', '2', '3', '2', '3', '2', '2', '3', '1', '2', '3', '3', '3', '3', '3', '3', '2', '2', '2', '3', '2', '3', '3', '2', '2', '2', '2']
['1000', '1150', '1060', '1790', '803', '1640', '1250', '1588', '1000', '1465', '1500', '1418', '1350', '1000', '1250', '857', '753', '1186', '1250', '770', '2150', '1900', '1460', '1885', '1950', '1250', '1820', '2060', '740', '331', '310', '610', '1160', '665', '1052', '685', '400', '332', '776', '800', '450', '430', '675', '1870', '820', '850', '1172', '404', '575', '507', '558', '680', '1130', '562', '535', '590', '792', '935', '632', '1044', '800', '1200', '1350', '950', '1001', '900', '800', '852', '1494', '700', '634', '1395', '930', '464', '580', '509', '990', '445', '469', '778', '850', '540', '1194', '898', '700', '600', '762', '1035', '800', '1300', '823', '1700', '1575', '700', '1540', '1150', '1310', '1000', '915', '1300', '600', '1000', '1300', '1516', '1800', '1100', '1620', '1600', '970', '1133', '925', '1875', '1050', '1640', '840', '800', '900', '650', '800', nan]
[' Omkar Pandey', ' gokul', ' Thayumanavan', ' Prasanna', ' Akash Akash', ' av nath', ' Sangeeta Pillai', ' ganga k', ' Anjan K', ' Omesh Saraf', ' Mohd Hussain ', ' Rahul Jain', ' Gaurav Kumar GAURAV', ' Ancy', ' Anoop', ' geetha', ' Omkar Omkar', ' ruchika', ' jeswanth', ' vikas saxena', ' Ranju', ' tezal', ' shravan', ' Bhav', ' Nagananda', ' MD Fuzail', ' ashwanth', ' Partha Sarma', ' Shekar', ' Sashank Constructions', ' K SH', ' NIYATI', ' Sachin Kunder', ' Motashaw Motashaw', ' MONCY BHASKAR', ' Anam khan', ' Milind Desai', ' Jas Baljeet J', ' Gangaram Dhuri', ' Satyajit Satyajit', ' vasant ahir', ' Rajendra', ' Aditi Shah', ' Haroon mansuri', ' kishor kishor', ' Medha Naik', ' Anita', ' jagdish Dassani', ' Raghav Sharma', ' nandan lanjekar', ' amit chalke', ' Arun Iyer', ' Ritu', ' Khuzema Tajir', ' Dipti solanki', ' sujatha', ' Aysha Khan', ' DHRUV SHARMA', ' Mumtaz Nazim', ' Savith Raghavan Savith Raghavan', ' Yogesh Baskaran', ' K VINODH KUMAR', ' GURURAJAN BABU', ' Vijay Jaya', ' siva', ' Prasanna', ' M DINESH KANNA', ' Elango Rajendran', ' Dev', ' vinothan', ' Owner', ' M G S Jayamalathi', ' Mahalingam', ' Kads', ' YOGEESWARAN', ' Kuppuswamy Bj', ' Jayalakshmi', ' Vidhya vidhya', ' Sujith', ' sakthi', ' Santhosh', ' Santhosh Developers', ' R Premalatha', ' Jebaselvan Navaraj', ' Ramkumar Gopalsamy', ' ram', ' Faizal', ' Vimal Kumar S', ' Venugopal', ' PURUSHOTHAMAN', ' Hari', ' Sl Prasanna', ' Sonia', ' Ramana Murthy', ' s bhargavi', ' Lakshman', ' Devi', ' jitendra sai', ' Ram', ' vakada ramana', ' Ravindra Pamidi', ' Praveen', ' Sai Stylish', ' MADHU', ' Uma Chavali', ' TEJARAM THRI', ' Jagan', ' Rakesh Power', ' Maheshwari', ' RAMACHANDRA RAO', ' Jaswanth', ' Srikanth', ' Akshay Kumar', ' swamy m', ' lalitha dhulipala', ' Vision Properties', ' Honey Group', ' New Living Properties', ' podilapu simhachalam', ' Dinakar']
['FloorGround out of 4', 'FloorGround out of 4', 'Floor4 out of 4', 'Floor14 out of 15', 'Floor2 out of 4', 'Floor1 out of 4', 'Floor3 out of 5', 'Basement out of 17', 'Floor3 out of 14', 'Floor2 out of 10', 'FloorGround out of 7', 'Floor16 out of 16', 'FloorGround out of 5', 'Floor1 out of 7', 'Floor2 out of 4', 'Floor2 out of 5', 'Floor3 out of 6', 'Floor1 out of 7', 'Floor1 out of 4', 'Floor6 out of 12', 'Floor9 out of 9', 'Floor1 out of 4', 'Floor2 out of 4', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor1 out of 3', 'Floor13 out of 27', 'Floor5 out of 13', 'Floor3 out of 3', 'FloorGround out of 4', 'Floor8 out of 9', 'Floor1 out of 5', 'Floor5 out of 15', 'Floor4 out of 7', 'FloorGround out of 76', 'Floor4 out of 4', 'FloorGround out of 3', 'Floor1 out of 6', 'Floor5 out of 20', 'Floor6 out of 12', 'Floor7 out of 7', 'FloorGround out of 4', 'Floor1 out of 3', 'Floor21 out of 21', 'Floor3 out of 3', 'Floor7 out of 21', 'Floor5 out of 7', 'Floor5 out of 7', 'Floor4 out of 15', 'Floor9 out of 15', 'Floor17 out of 18', 'Floor5 out of 12', 'Floor7 out of 7', 'Floor15 out of 23', 'Floor3 out of 4', 'Floor2 out of 13', 'Floor2 out of 4', 'Floor1 out of 11', 'Floor1 out of 4', 'Floor1 out of 2', 'FloorGround out of 2', 'Floor2 out of 2', 'Floor1 out of 2', 'Floor1 out of 2', 'Floor1 out of 2', 'Floor4 out of 4', 'Floor3 out of 5', 'Floor16 out of 16', 'Floor3 out of 4', 'Floor1 out of 2', 'Floor4 out of 4', 'Floor3 out of 4', 'Floor1 out of 2', 'Floor2 out of 4', 'Floor2 out of 2', 'Floor2 out of 3', 'Floor2 out of 3', 'Floor1 out of 4', 'Floor1 out of 3', 'Floor1 out of 3', 'Floor1 out of 1', 'FloorGround out of 2', 'Floor1 out of 2', 'Floor1 out of 1', 'Floor1 out of 2', 'Floor1 out of 1', 'Floor1 out of 4', 'Floor1 out of 4', 'Floor2 out of 5', 'Floor10 out of 16', 'Floor15 out of 16', 'FloorGround out of 6', 'Floor2 out of 3', 'Floor3 out of 5', 'Floor4 out of 4', 'Floor1 out of 5', 'Floor2 out of 4', 'Floor2 out of 5', 'Floor2 out of 5', 'Basement out of 3', 'Floor2 out of 5', 'Floor3 out of 5', 'Floor1 out of 5', 'Floor2 out of 5', 'Floor1 out of 5', 'Floor5 out of 5', 'Floor1 out of 5', 'Floor2 out of 4', 'Floor2 out of 10', 'Floor4 out of 5', 'Floor2 out of 3', 'Floor1 out of 5', 'Floor4 out of 5', 'Floor2 out of 5', 'Floor1 out of 5', 'Floor3 out of 5', 'Floor5 out of 5', 'Floor3 out of 5', nan, nan]
[' Kattigenahalli', ' Ayodaya Nagar', ' Electronic City ', ' Tumkur Road ', ' Kudlu Gate ', ' Hal Stage 2', ' Murugeshpalya', ' Begur Road ', ' Judicial Layout', ' Bellandur', ' Kasturi Nagar ', ' Koramangala Block 1 ', ' Sarjapur Road ', ' Devanahalli ', ' Hanumantha Nagar', ' ', ' Doddakannelli ', ' Electronic City ', ' Sarjapur Road ', ' Jigani ', ' ', ' ', ' Jaya Nagar Block 3 ', ' whitefield ', ' whitefield ', ' Harlur ', ' Sarjapur Road ', ' Hoodi', ' Haralur Ma', ' Yelahanka ', ' Daulat Nagar ', ' Samel Pada ', ' Borivali West ', ' Goregaon West ', ' Vasai East ', ' Worli ', ' Kurla East ', ' Andheri East ', ' Kandivali East ', ' Mahim West ', ' Borivali West ', ' Chembur West ', ' ', ' ', ' Dadar West ', ' Boisar West ', ' Mira Road ', ' K', ' Virar West ', ' Vasai West ', ' Bhandup West ', ' Kalyan ', ' Goregaon East ', ' Marol Maroshi Road ', ' Malad West ', ' Vikhroli East ', ' Juhu ', ' Andheri East ', ' Jogeshwari West ', ' Mulund West ', ' Mannivakkam Chennai', ' New Perungalathur Chennai', ' Kovilambakkam Chennai', ' Perumbakkam Chennai', ' Mogappair Chennai', ' Tambaram West Tambaram Chennai', ' Manapakkam Chennai', ' Mogappair Chennai', ' Perumbakkam Chennai', ' Iyyappanthangal Chennai', ' Manapakkam Chennai', ' Madipakkam Chennai', ' Selaiyur Chennai', ' Velachery Chennai', ' Sithalapakkam Chennai', ' Adyar Chennai', ' Shankar Nagar Pammal Chennai', ' Chennai', ' Porur Chennai', ' Chennai', ' Villivakkam Chennai', ' Kundrathur Chennai', ' AGS Colony Velachery Chennai', ' Pallikaranai Chennai', ' Pozhichalur Chennai', ' Jothi Nagar Chitlapakkam Chennai', ' VGP Layout Palavakkam Chennai', ' Perundevi Ammal Nagar Chennai', ' Gu', ' Chennai', ' ', ' ', ' Madhurwada ', ' Pendurthi ', ' Pithapuram Colony ', ' Madhurwada ', ' Pendurthi ', ' Visalakshi Nagar ', ' Balaji Nagar ', ' Sujatha nagar ', ' ', ' Mural', ' Poth', ' Kurmannapalem ', ' Kurmannapalem ', ' Pendurthi ', ' Kurmannapalem ', ' ', ' ', ' Kancharapalem ', ' Kurmannapalem ', ' Railway New Colony ', ' Gajuwaka ', ' Aganampudi ', ' Madhurwada ', ' Tagarapuvalsa ', ' Parawada ', ' PM Palem ', ' Madhurwada ', ' MVP Colony ']
['bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam']
120
120
120
120
120
120
120
120
In [ ]:
 
In [151]:
# inserting NaN's at index positions
per_sqt.insert(32,np.nan)
per_sqt.insert(39,np.nan)
per_sqt.insert(43,np.nan)
per_sqt.insert(60,np.nan)
per_sqt.insert(67,np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
In [152]:
# inserting NaN's at index positions
Floor.append(np.nan)
Floor.append(np.nan)
In [153]:
# inserting NaN's at index positions
Carpet_Area.append(np.nan)
In [154]:
# Creation the Dict with cloumns names as strings and values as column names in which values are present.
df1 = {'City':City,
             'Area':Area,
             'Owner':Owner,
             'BHK':BHK,
             'Floor':Floor,
             'price':price,
             'Carpet_Area':Carpet_Area,
             'per_sqt':per_sqt
}
In [156]:
Project1 = pd.DataFrame(df1)
In [158]:
Project1.head(20)
Out[158]:
City Area Owner BHK Floor price Carpet_Area per_sqt
0 bangalore Kattigenahalli Omkar Pandey 3 FloorGround out of 4 ₹60 Lac 1000 ₹4950
1 bangalore Ayodaya Nagar gokul 2 FloorGround out of 4 ₹80 Lac 1150 ₹6957
2 bangalore Electronic City Thayumanavan 2 Floor4 out of 4 ₹48 Lac 1060 ₹4528
3 bangalore Tumkur Road Prasanna 3 Floor14 out of 15 ₹1.35 Cr 1790 ₹7542
4 bangalore Kudlu Gate Akash Akash 2 Floor2 out of 4 ₹61 Lac 803 ₹5706
5 bangalore Hal Stage 2 av nath 3 Floor1 out of 4 ₹1.30 Cr 1640 ₹7303
6 bangalore Murugeshpalya Sangeeta Pillai 2 Floor3 out of 5 ₹50 Lac 1250 ₹5000
7 bangalore Begur Road ganga k 3 Basement out of 17 ₹1.80 Cr 1588 ₹11335
8 bangalore Judicial Layout Anjan K 3 Floor3 out of 14 ₹1 Cr 1000 ₹8889
9 bangalore Bellandur Omesh Saraf 2 Floor2 out of 10 ₹1.02 Cr 1465 ₹6997
10 bangalore Kasturi Nagar Mohd Hussain 3 FloorGround out of 7 ₹1.45 Cr 1500 ₹8146
11 bangalore Koramangala Block 1 Rahul Jain 3 Floor16 out of 16 ₹3.85 Cr 1418 ₹20632
12 bangalore Sarjapur Road Gaurav Kumar GAURAV 3 FloorGround out of 5 ₹70 Lac 1350 ₹5655
13 bangalore Devanahalli Ancy 2 Floor1 out of 7 ₹85 Lac 1000 ₹6071
14 bangalore Hanumantha Nagar Anoop 2 Floor2 out of 4 ₹98 Lac 1250 ₹9800
15 bangalore geetha 2 Floor2 out of 5 ₹60 Lac 857 ₹6367
16 bangalore Doddakannelli Omkar Omkar 2 Floor3 out of 6 ₹78 Lac 753 ₹5158
17 bangalore Electronic City ruchika 2 Floor1 out of 7 ₹57 Lac 1186 ₹6374
18 bangalore Sarjapur Road jeswanth 3 Floor1 out of 4 ₹1.08 Cr 1250 ₹4553
19 bangalore Jigani vikas saxena 3 Floor6 out of 12 ₹57 Lac 770 ₹4545
In [169]:
# conversion of DataFrame to csv
Project1.to_csv('DF1.csv')
In [160]:
# Code for the collection of the data
# here list of 3 cities are taken and collection of data is done for them
cities = ['ranchi','haridwar','vadodara']
price=[]
per_sqt=[]
BHK=[]
Carpet_Area=[]
Owner=[]
Floor=[]
Area=[]
City=[]
for j in cities:
    url = "https://www.magicbricks.com/flats-in-"+j+"-for-sale-pppfs"
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text)
    
    for i in soup.find_all("div",class_="mb-srp__card__price--amount"):
        price.append(i.text)
        
    for i in soup.find_all("div",class_="mb-srp__card__price--size"):
        per_sqt.append(i.text.split(' ')[0])
        
    for i in soup.find_all("h2",class_="mb-srp__card--title"):
        BHK.append(i.text[0])
        
    for i in soup.find_all("div",class_="mb-srp__card__summary--value"):
        a =i.text
        b = re.findall(r"(\d+)\ssqft",str(a))
        if b:
            Carpet_Area.append(b[0])
    for i in soup.find_all('div',class_='mb-srp__card__ads--name'):
        Owner.append(i.text.split(':')[1])    

            
    for i in soup.find_all("div",class_="mb-srp__card__summary__list--item"):
        a = i.text
        b = re.findall(r"\w+\s\w+\s\w+\s\d+",a)
        if b:
            Floor.append(b[0])
            
    for i in soup.find_all("h2",class_="mb-srp__card--title"):
        Area.append(i.text.split('in')[1].split(',')[0])
        City.append(j)
    print(i,'---->', len(Owner))
    print(i,'---->', len(Floor))
    print(i,'---->', len(per_sqt))
https://www.magicbricks.com/flats-in-ranchi-for-sale-pppfs
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Hawai Nagar , Ranchi</h2> ----> 30
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Hawai Nagar , Ranchi</h2> ----> 30
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Hawai Nagar , Ranchi</h2> ----> 28
https://www.magicbricks.com/flats-in-haridwar-for-sale-pppfs
<h2 class="mb-srp__card--title">1 BHK Flat for Sale in Sidcul , Haridwar</h2> ----> 60
<h2 class="mb-srp__card--title">1 BHK Flat for Sale in Sidcul , Haridwar</h2> ----> 60
<h2 class="mb-srp__card--title">1 BHK Flat for Sale in Sidcul , Haridwar</h2> ----> 57
https://www.magicbricks.com/flats-in-vadodara-for-sale-pppfs
<h2 class="mb-srp__card--title">4 BHK Flat for Sale in , Vadodara</h2> ----> 90
<h2 class="mb-srp__card--title">4 BHK Flat for Sale in , Vadodara</h2> ----> 89
<h2 class="mb-srp__card--title">4 BHK Flat for Sale in , Vadodara</h2> ----> 83
In [163]:
# inserting NaN's at index positions
Floor.append(np.nan)
Carpet_Area.append(np.nan)
In [162]:
# inserting NaN's at index positions
per_sqt.insert(11,np.nan)
per_sqt.insert(25,np.nan)
per_sqt.insert(53,np.nan)
per_sqt.insert(71,np.nan)
per_sqt.insert(72,np.nan)
per_sqt.insert(79,np.nan)
per_sqt.insert(84,np.nan)
In [164]:
# printing to see the values are obtained are not
print(price)
print(per_sqt)
print(BHK)
print(Carpet_Area)
print(Owner)
print(Floor)
print(Area)
print(City)

# Finding the length to see is all the lengths are same
print(len(price))
print(len(per_sqt))
print(len(BHK))
print(len(Carpet_Area))
print(len(Owner))
print(len(Floor))
print(len(Area))
print(len(City))
['₹62 Lac ', '₹76.4 Lac ', '₹77 Lac ', '₹31.5 Lac ', '₹68.6 Lac ', '₹75 Lac ', '₹40 Lac ', '₹58 Lac ', '₹60 Lac ', '₹56 Lac ', '₹51 Lac ', '₹77 Lac ', '₹31 Lac ', '₹33 Lac ', '₹1.15 Cr ', '₹68 Lac ', '₹82 Lac ', '₹32 Lac ', '₹58 Lac ', '₹45 Lac ', '₹45 Lac ', '₹45 Lac ', '₹1.25 Cr ', '₹40 Lac ', '₹48 Lac ', '₹1.05 Cr ', '₹60 Lac ', '₹91 Lac ', '₹48 Lac ', '₹1.25 Cr ', '₹41 Lac ', '₹77 Lac ', '₹23.5 Lac ', '₹58 Lac ', '₹14.3 Lac ', '₹1.90 Cr ', '₹38 Lac ', '₹32 Lac ', '₹50.5 Lac ', '₹47 Lac ', '₹28 Lac ', '₹55 Lac ', '₹40 Lac ', '₹35 Lac ', '₹48 Lac ', '₹58 Lac ', '₹16.5 Lac ', '₹25 Lac ', '₹20 Lac ', '₹16 Lac ', '₹67 Lac ', '₹25 Lac ', '₹28 Lac ', '₹37 Lac ', '₹40 Lac ', '₹66 Lac ', '₹14 Lac ', '₹34 Lac ', '₹65 Lac ', '₹22 Lac ', '₹60 Lac ', '₹80 Lac ', '₹22 Lac ', '₹31 Lac ', '₹25 Lac ', '₹23 Lac ', '₹15.5 Lac ', '₹45 Lac ', '₹79 Lac ', '₹45 Lac ', '₹30 Lac ', '₹7.5 Lac ', '₹45 Lac ', '₹35 Lac ', '₹65 Lac ', '₹47 Lac ', '₹19.5 Lac ', '₹25 Lac ', '₹31 Lac ', '₹35 Lac ', '₹25 Lac ', '₹90 Lac ', '₹50 Lac ', '₹82 Lac ', '₹19 Lac ', '₹60 Lac ', '₹8 Lac ', '₹27 Lac ', '₹42 Lac ', '₹1.20 Cr ']
['₹4655', '₹4200', '₹5325', '₹5497', '₹4400', '₹3731', '₹3774', '₹4531', '₹4317', '₹3733', '₹3682', nan, '₹2897', '₹3986', '₹5476', '₹5207', '₹5325', '₹2712', '₹4629', '₹4110', '₹4039', '₹4274', '₹5556', '₹3448', '₹3840', nan, '₹4484', '₹5170', '₹4800', '₹8333', '₹2522', '₹4529', '₹2901', '₹5273', '₹2192', '₹5429', '₹3140', '₹2960', '₹3483', '₹3167', '₹2605', '₹3963', '₹3636', '₹3500', '₹3967', '₹4531', '₹3113', '₹3846', '₹2857', '₹2133', '₹4295', '₹3125', '₹3230', nan, '₹3687', '₹3667', '₹2318', '₹3716', '₹3824', '₹2933', '₹2953', '₹5333', '₹2444', '₹3875', '₹2941', '₹3286', '₹2053', '₹2970', '₹4158', '₹2093', '₹2542', nan, nan, '₹3182', '₹3683', '₹3298', '₹1696', '₹2551', '₹2818', nan, '₹2174', '₹3817', '₹5556', '₹3216', nan, '₹2521', '₹4000', '₹3000', '₹2545', '₹4615']
['3', '3', '3', '1', '3', '3', '2', '3', '3', '3', '3', '3', '2', '2', '3', '3', '3', '2', '2', '2', '2', '3', '4', '2', '3', '5', '3', '3', '2', '3', '2', '3', '2', '3', '1', '2', '3', '2', '2', '3', '2', '3', '2', '2', '3', '2', '1', '1', '1', '1', '4', '2', '1', '2', '2', '3', '1', '2', '3', '1', '3', '3', '2', '2', '2', '1', '1', '2', '3', '4', '2', '1', '2', '2', '3', '3', '2', '2', '2', '2', '2', '3', '3', '3', '2', '4', '1', '2', '3', '4']
['1110', '1500', '1150', '573', '1560', '2010', '930', '1280', '1250', '1500', '1385', '1212', '1000', '800', '1925', '1306', '1540', '1180', '971', '950', '1114', '1053', '2250', '950', '1150', '1650', '1338', '1760', '968', '1500', '1490', '750', '1100', '650', '3000', '1210', '719', '1450', '1484', '1075', '1388', '1025', '890', '1050', '500', '530', '650', '700', '650', '1600', '800', '867', '1450', '900', '1800', '604', '915', '1700', '750', '2032', '1500', '900', '780', '1208', '700', '755', '1515', '1900', '1465', '1080', '450', '1010', '850', '1016', '1025', '800', '980', '800', '640', '1150', '2358', '900', '2350', '750', '1580', '200', '600', '1650', '2600', nan]
[' Waseem Ahmad', ' Anuj Krchandra', ' Rabindra Bakshi', ' Abhishek Gupta', ' Shree Ram Developers', ' vivek chandra', ' sushant Bhattacharjee', ' Ritesh Nagpal', ' Nitin Kumar', ' PARTHO BOSE', ' Pradeep', ' rajesh singh', ' ashish singh', ' sanjeev', ' Kshav', ' Abhishek', ' Neeraj', ' Manit Kumar', ' Mukesh Mukesh', ' Samita Chatterjee', ' shakuntala verma', ' vikash singh', ' Avishek Modi Modi', ' Amar nath Pandey', ' MANISH KUMAR', ' taha', ' Rabish Kumar', ' Pintu Singh', ' Ravi Prasad', ' Rajiv Ranjan', ' Kiran Kumar Patel', ' SHREYA SAGAR', ' Krrish', ' Rajendra Wadhwa', ' Shakti Verma', ' Mridul Puri', ' Manish', ' Ajay', ' tarun agarwal', ' Arvind Bhardwaj', ' Jasjit Pannu', ' Abhishek Chauhan', ' sandeep', ' &quot;Magicbricks User&quot;', ' Dinesh Kukreti', ' Shalinirahal Banga', ' Alok Singh', ' saba b Khan', ' Rahul kumar', ' SANDEEP KUMAR', ' Sandeep Chaturvedi', ' AYUSH SAXENA', ' puneet', ' Abhishek chaudhary', ' Rishabh Kapoor', ' Surendra Satija', ' Harendra Kumar', ' chandra', ' Rohit', ' Dharmveer', ' Samy', ' Sudhanshu Kavthekar', ' Jaimin Parmar ', ' paresh', ' Mayur Patel', ' Parimal Patel', ' Rakesh', ' Bhavik Bhavik Mehta', ' Jyotsna Khoda', ' raghuvirsinh Vaghela', ' Pratik gosaliya', ' Umakant parmar', ' Akshat Dani', ' Nimisha', ' Hitansh Agrawal', ' anil', ' mitul patel', ' anil makvana', ' Manubhai Shah', ' Shashank', ' Pushpita Roy Choudhury', ' namita Namita', ' Deepak Singh', ' Manish Nagarajan', ' Ankur Javia', ' Jigar', ' Maheshbhai Dhokia', ' Aniruddha', ' Bindi shah', ' Jiten chokshi']
['Floor3 out of 4', 'Floor5 out of 6', 'Floor4 out of 4', 'Floor2 out of 4', 'Floor8 out of 12', 'Floor2 out of 4', 'Floor1 out of 4', 'Floor2 out of 7', 'Floor4 out of 4', 'Floor1 out of 1', 'Floor3 out of 4', 'Floor4 out of 6', 'Floor1 out of 4', 'Floor3 out of 3', 'Floor9 out of 10', 'Floor4 out of 4', 'Floor4 out of 5', 'Floor3 out of 4', 'FloorGround out of 6', 'Floor3 out of 4', 'Floor3 out of 3', 'Floor1 out of 11', 'Floor3 out of 4', 'Floor3 out of 3', 'Floor2 out of 3', 'FloorGround out of 8', 'Floor1 out of 4', 'Floor4 out of 4', 'Floor1 out of 4', 'Floor4 out of 8', 'FloorGround out of 4', 'FloorGround out of 7', 'Floor6 out of 8', 'Floor1 out of 5', 'Floor1 out of 4', 'FloorGround out of 2', 'Floor2 out of 7', 'Floor3 out of 7', 'Floor6 out of 6', 'Floor1 out of 1', 'Floor4 out of 6', 'Floor2 out of 5', 'FloorGround out of 4', 'Floor3 out of 7', 'Floor5 out of 7', 'FloorGround out of 1', 'FloorGround out of 3', 'FloorGround out of 4', 'Floor3 out of 4', 'Floor1 out of 4', 'Floor2 out of 4', 'Floor3 out of 3', 'Floor4 out of 4', 'Floor3 out of 3', 'Floor5 out of 12', 'Floor4 out of 5', 'Floor2 out of 2', 'Floor3 out of 7', 'Floor2 out of 7', 'Floor4 out of 6', 'Floor5 out of 5', 'Floor3 out of 4', 'Floor3 out of 4', 'Floor1 out of 3', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor6 out of 8', 'Floor3 out of 4', 'Floor4 out of 5', 'Floor1 out of 5', 'Floor3 out of 4', 'Floor1 out of 3', 'Floor3 out of 5', 'Floor5 out of 12', 'Floor4 out of 9', 'Floor2 out of 5', 'Floor2 out of 8', 'Floor1 out of 3', 'Floor4 out of 5', 'Floor4 out of 4', 'Floor4 out of 9', 'Floor5 out of 5', 'Floor9 out of 9', 'Floor3 out of 4', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor1 out of 5', 'Floor3 out of 5', 'Floor2 out of 2', nan]
[' H', ' ', ' S', ' Ratu Road ', ' Harihar S', ' Bariatu Road ', ' Morabadi ', ' Bahu Bazar ', ' ', ' Vikas Nagar ', ' Bariatu ', ' Hesag ', ' ', ' Barga', ' Sector 2 Masibari ', ' Upper Chutia ', ' Hazaribag Road ', ' Namkum ', ' Lalpur ', ' Hesag ', ' Kokar ', ' BIT Mesra ', ' ', ' Harihar S', ' H', ' ', ' Namkum ', ' ', ' Bariatu Road ', ' Hawai Nagar ', ' Sidcul ', ' Roshanabad ', ' Sidcul ', ' Devpura ', ' Laksar Road ', ' Arya Nagar ', ' Jwalapur ', ' Roshanabad ', ' Patanjali ', ' Rajaji National Park ', ' Roshanabad ', ' Jwalapur ', ' Jwalapur ', ' Jwalapur ', ' ', ' Roorkee ', ' Patanjali ', ' ', ' Shivalik Nagar ', ' Shantikunj ', ' Haripur Kalan ', ' ', ' Kankhal ', ' Jwalapur ', ' Patanjali ', ' NH-58 ', ' Jwalapur ', ' Sidcul ', ' NH-58 ', ' Sidcul ', ' Vasna Bhayli Ma', ' Race Course circle ', ' Ajwa Road ', ' ', ' Makar Pura ', ' Chhani ', ' Suryanagar ', ' Gotri Road Gotri ', ' Vasant Vihar ', ' Sevasi ', ' Chhani ', ' Nani Bapod ', ' Race Course circle ', ' Kalali ', ' Sama Savli Road ', ' Gorwa ', ' Bill ', ' Vasna Bhayli Ma', ' Subhanpura ', ' Bhayli ', ' Danteshwar ', ' Vasna Bhayli Ma', ' Gotri Sevasi Road ', ' Harni Road ', ' Sayajipura ', ' Bill ', ' ', ' Vasna Bhayli Ma', ' ', ' ']
['ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara']
90
90
90
90
90
90
90
90
In [165]:
# Creation the Dict with cloumns names as strings and values as column names in which values are present.
df2 = {'City':City,
             'Area':Area,
             'Owner':Owner,
             'BHK':BHK,
             'Floor':Floor,
             'price':price,
             'Carpet_Area':Carpet_Area,
             'per_sqt':per_sqt
}
In [166]:
# Creation of DataFrame
Project2=pd.DataFrame(df2)
In [172]:
Project2.head(20)
Out[172]:
City Area Owner BHK Floor price Carpet_Area per_sqt
0 ranchi H Waseem Ahmad 3 Floor3 out of 4 ₹62 Lac 1110 ₹4655
1 ranchi Anuj Krchandra 3 Floor5 out of 6 ₹76.4 Lac 1500 ₹4200
2 ranchi S Rabindra Bakshi 3 Floor4 out of 4 ₹77 Lac 1150 ₹5325
3 ranchi Ratu Road Abhishek Gupta 1 Floor2 out of 4 ₹31.5 Lac 573 ₹5497
4 ranchi Harihar S Shree Ram Developers 3 Floor8 out of 12 ₹68.6 Lac 1560 ₹4400
5 ranchi Bariatu Road vivek chandra 3 Floor2 out of 4 ₹75 Lac 2010 ₹3731
6 ranchi Morabadi sushant Bhattacharjee 2 Floor1 out of 4 ₹40 Lac 930 ₹3774
7 ranchi Bahu Bazar Ritesh Nagpal 3 Floor2 out of 7 ₹58 Lac 1280 ₹4531
8 ranchi Nitin Kumar 3 Floor4 out of 4 ₹60 Lac 1250 ₹4317
9 ranchi Vikas Nagar PARTHO BOSE 3 Floor1 out of 1 ₹56 Lac 1500 ₹3733
10 ranchi Bariatu Pradeep 3 Floor3 out of 4 ₹51 Lac 1385 ₹3682
11 ranchi Hesag rajesh singh 3 Floor4 out of 6 ₹77 Lac 1212 NaN
12 ranchi ashish singh 2 Floor1 out of 4 ₹31 Lac 1000 ₹2897
13 ranchi Barga sanjeev 2 Floor3 out of 3 ₹33 Lac 800 ₹3986
14 ranchi Sector 2 Masibari Kshav 3 Floor9 out of 10 ₹1.15 Cr 1925 ₹5476
15 ranchi Upper Chutia Abhishek 3 Floor4 out of 4 ₹68 Lac 1306 ₹5207
16 ranchi Hazaribag Road Neeraj 3 Floor4 out of 5 ₹82 Lac 1540 ₹5325
17 ranchi Namkum Manit Kumar 2 Floor3 out of 4 ₹32 Lac 1180 ₹2712
18 ranchi Lalpur Mukesh Mukesh 2 FloorGround out of 6 ₹58 Lac 971 ₹4629
19 ranchi Hesag Samita Chatterjee 2 Floor3 out of 4 ₹45 Lac 950 ₹4110
In [168]:
# conversion of DataFrame to csv
Project2.to_csv('DF2.csv')
In [ ]:
 
In [179]:
# Code for the collection of the data
# here list of 7 cities are taken and collection of data is done for them
cities = ['greater-noida','gurgaon','mangalore','raipur','agra','bhiwadi','mysore']
City = []
Area=[]
Owner=[]
BHK=[]
Floor=[]
price=[]
Carpet_Area=[]
per_sqt=[]
for j in cities:
    url = "https://www.magicbricks.com/flats-in-"+j+"-for-sale-pppfs"
    print(url)
    page = requests.get(url)
    soup = BeautifulSoup(page.text)
    
    for i in soup.find_all("div",class_="mb-srp__card__price--amount"):
        price.append(i.text)
        
    for i in soup.find_all("div",class_="mb-srp__card__price--size"):
        per_sqt.append(i.text.split(' ')[0])
        
    for i in soup.find_all("h2",class_="mb-srp__card--title"):
        BHK.append(i.text[0])
        
    for i in soup.find_all("div",class_="mb-srp__card__summary--value"):
        a =i.text
        b = re.findall(r"(\d+)\ssqft",str(a))
        if b:
            Carpet_Area.append(b[0])
    for i in soup.find_all('div',class_='mb-srp__card__ads--name'):
        Owner.append(i.text.split(':')[1])    

            
    for i in soup.find_all("div",class_="mb-srp__card__summary__list--item"):
        a = i.text
        b = re.findall(r"\w+\s\w+\s\w+\s\d+",a)
        if b:
            Floor.append(b[0])
            
    for i in soup.find_all("h2",class_="mb-srp__card--title"):
        Area.append(i.text.split('in')[1].split(',')[0])
        City.append(j)
    print(i,'---->', len(Owner))
    print(i,'---->', len(Floor))
    print(i,'---->', len(per_sqt))
https://www.magicbricks.com/flats-in-greater-noida-for-sale-pppfs
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Yamuna Expressway , Greater Noida</h2> ----> 29
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Yamuna Expressway , Greater Noida</h2> ----> 31
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Yamuna Expressway , Greater Noida</h2> ----> 28
https://www.magicbricks.com/flats-in-gurgaon-for-sale-pppfs
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Sector 65 , Gurgaon</h2> ----> 59
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Sector 65 , Gurgaon</h2> ----> 64
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Sector 65 , Gurgaon</h2> ----> 56
https://www.magicbricks.com/flats-in-mangalore-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yeyyadi Indl. Estate , Mangalore</h2> ----> 89
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yeyyadi Indl. Estate , Mangalore</h2> ----> 93
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yeyyadi Indl. Estate , Mangalore</h2> ----> 82
https://www.magicbricks.com/flats-in-raipur-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Daldal Seoni , Raipur</h2> ----> 119
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Daldal Seoni , Raipur</h2> ----> 123
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Daldal Seoni , Raipur</h2> ----> 112
https://www.magicbricks.com/flats-in-agra-for-sale-pppfs
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Rakabganj , Agra</h2> ----> 149
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Rakabganj , Agra</h2> ----> 153
<h2 class="mb-srp__card--title">3 BHK Flat for Sale in Rakabganj , Agra</h2> ----> 142
https://www.magicbricks.com/flats-in-bhiwadi-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Alwar Bypass Road , Bhiwadi</h2> ----> 179
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Alwar Bypass Road , Bhiwadi</h2> ----> 184
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Alwar Bypass Road , Bhiwadi</h2> ----> 172
https://www.magicbricks.com/flats-in-mysore-for-sale-pppfs
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Sriramapura , Mysore</h2> ----> 209
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Sriramapura , Mysore</h2> ----> 213
<h2 class="mb-srp__card--title">2 BHK Flat for Sale in Sriramapura , Mysore</h2> ----> 198
In [ ]:
 
In [184]:
# printing to see the values are obtained are not
print(City)
print(Area)
print(BHK)
print(Carpet_Area)
print(Owner)
print(price)
print(per_sqt)
print(Floor)

# Finding the length to see is all the lengths are same
print(len(City))
print(len(Area))
print(len(BHK))
print(len(Carpet_Area))
print(len(Owner))
print(len(price))
print(len(per_sqt))
print(len(Floor))
['greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore']
[' Zeta 1 ', ' Sector P4 ', ' Eta 2', ' Sector 3 ', ' Zeta 1 ', ' Omicron ', ' Greater Noida West ', ' Eta 2', ' Yamuna Expressway ', ' Greater Noida West ', ' ', ' Chi 5 ', ' Jaypee Greens ', ' Zeta 1 ', ' Greater Noida West ', ' Omicron 1 ', ' Greater Noida West ', ' Sector 1 Greater Noida West ', ' Greater Noida West ', ' Greater Noida West ', ' Greater Noida West ', ' ', ' Noida Extention ', ' Sector 1 Greater Noida West ', ' Omicron 3 ', ' UPSIDC Site C Block H ', ' Sector 16 ', ' Chi 5 ', ' Yamuna Expressway ', ' Yamuna Expressway ', ' Jal Vayu Vihar', ' Sohna Sector 32 ', ' Kendriya Vihar', ' Sector 23 ', ' Sohna Sector 35 ', ' Sector 84 ', ' Sector 104 ', ' Sector 78 ', ' Sector 112 ', ' Sector 102 ', ' Sector 48 ', ' Sector 7 ', ' Sector 55 ', ' DLF City Phase 1 ', ' Sector 57 ', ' New Colony', ' Sohna Sector 33 ', ' Sector 83 ', ' Sector 47 ', ' Sector 37C', ' Sector 9 ', ' DLF City Phase 1 ', ' Sector 4 ', ' South City 2', ' New Palam Vihar', ' Sector 54 ', ' Patel Nagar', ' Sector 77', ' Sector 65 ', ' Sector 65 ', ' Shakti Nagar ', ' Urwa ', ' Bunts Hostel Road ', ' Urwa ', ' Kulshekar ', ' Kulai ', ' Bajpe ', ' Ujire ', ' Bondel ', ' Kateel ', ' K', ' Shakti Nagar ', ' Falnir ', ' Kulai ', ' Kadri ', ' Padil ', ' Kulai ', ' Urwa ', ' Falnir ', ' Jepp', ' Derebail ', ' Mulky ', ' ', ' Pandeshwar ', ' Ashok Nagar ', ' Konchady Cross Road ', ' Pumpwell ', ' Mukka ', ' Nandigudda ', ' Yeyyadi Indl. Estate ', ' Daldal Seoni ', ' Mowa ', ' Avanti Vihar ', ' Shankar Nagar ', ' Santoshi Nagar ', ' Naya Raipur ', ' Hirapur Road ', ' VIP Road ', ' Mathpurena ', ' Boria Kalan ', ' Amlidhi ', ' Bhatagaon ', ' Kabir Nagar ', ' Shankar Nagar ', ' Hirapur Road ', ' NH 6 ', ' Shankar Nagar ', ' Deopuri ', ' Kota ', ' Amlidhi ', ' Hirapur Road ', ' Santoshi Nagar ', ' Boria Kalan ', ' Shankar Nagar ', ' Amleshwar ', ' Amlidhi ', ' Kachna Road ', ' Mowa ', ' Amanaka ', ' Daldal Seoni ', ' Sikandra ', ' Vibhav Nagar ', ' Vijay Nagar Colony ', ' Civil L', ' Dayal Bagh ', ' Civil L', ' Shastri Puram ', ' Agra Shamshabad Raja Kherah Marg ', ' Dayal Bagh ', ' Dayal Bagh ', ' Dayal Bagh ', ' Fatehabad Road ', ' Agra Shamshabad Raja Kherah Marg ', ' Sikandra ', ' Sikandra ', ' Dayal Bagh ', ' Sikandra ', ' Fatehabad Road ', ' Shastri Puram ', ' Shastri Puram ', ' Fatehabad Road ', ' Sikandra ', ' Shahganj ', ' Fatehabad Road ', ' Gwalior Road ', ' Agra Shamshabad Raja Kherah Marg ', ' Sector 16B Awas Vikas Colony ', ' Fatehabad Road ', ' Shastri Puram ', ' Rakabganj ', ' Alwar Bypass Road ', ' ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Bhiwadi Mod ', ' Tapukara ', ' Alwar Bhiwadi Road ', ' Vasundhara Nagar ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Rampura ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Vasundhara Nagar ', ' Tapukara ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Gokulam ', ' Bogadi ', ' JP Nagar ', ' Yadavgiri ', ' Yadavgiri ', ' Hebbal ', ' Vijayanagar 4th Stage ', ' Bogadi ', ' Sharadadevi Nagar ', ' Alanahalli ', ' Vijaynagar 3rd Stage ', ' Belagola ', ' ', ' Visveshwara Nagar ', ' ', ' Siddhartha Layout ', ' Vidyarayanapuram ', ' Srirangapatnam ', ' JP Nagar ', ' Bogadi ', ' Chamarajapura ', ' Metagalli ', ' Jaya Laxmi Puram ', ' Hebbal 2nd Stage ', ' Lakshmipuram ', ' ', ' Kuvempunagar ', ' ', ' V V Mohalla ', ' Sriramapura ']
['2', '3', '3', '2', '2', '3', '4', '2', '1', '2', '2', '4', '4', '4', '2', '2', '3', '2', '3', '2', '2', '2', '3', '2', '3', '2', '3', '3', '2', '3', '2', '3', '3', '2', '1', '3', '2', '3', '3', '4', '3', '3', '3', '4', '3', '3', '2', '3', '3', '2', '4', '2', '4', '4', '2', '3', '3', '4', '3', '3', '2', '2', '3', '2', '3', '2', '4', ' ', '3', '2', '1', '2', '3', '2', '2', '3', '3', '2', '2', '2', '2', '2', '2', '2', '3', '2', '3', '2', '3', '2', '2', '3', '3', '2', '3', '2', '3', '2', '2', '2', '2', '2', '1', '2', '2', '3', '2', '3', '3', '2', '3', '3', '3', '3', '3', '2', '3', '2', '1', '2', '2', '3', '2', '2', '5', '2', '2', '3', '3', '3', '3', '2', '2', '2', '2', '3', '5', '3', '2', '2', '3', '3', '2', '3', '2', '2', '2', '3', '2', '3', '2', '2', '2', '1', '2', '2', '2', '2', '1', '1', '3', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '2', '1', '2', '3', '4', '2', '2', '2', '1', '2', '3', '4', '2', '2', '4', '2', '2', '2', '2', '3', '3', '3', '3', '2', '3', '3', '2', '3', '2', '2', '2', '2', '2']
['1200', '1120', '995', '970', '895', '1775', '2364', '660', nan, '950', '995', '3210', '3441', '2200', '910', '840', '1530', '862', '1055', '615', '1155', '950', '840', '870', '1395', '1115', '1285', '1494', '1115', '13750', '900', '1789', '1500', '750', '310', '1750', '570', '1657', '3763', '1800', '1300', '900', '1950', '1556', '1217', nan, '610', '1745', '2470', '10372', '1450', '1250', '1550', '1800', '550', '3650', '960', '1500', '1500', '3069', '670', '1100', '1700', '1275', '1330', '906', '3030', nan, '1390', '1260', '660', '1166', '3900', '975', '1390', '1070', '1870', '1040', '1000', '1320', '783', '72', '1000', '1330', '1356', '884', '1613', '1100', '1341', '1200', '970', '1200', '1985', '840', '1250', '943', '799', '900', '1050', '487', '700', '815', '505', '866', '721', '1200', '1030', '1500', '1805', '1230', '965', '1250', '1337', '960', '1200', '1000', '1500', '860', '550', '906', '900', '1540', '1250', '850', nan, '1050', '848', '1350', '1050', '1400', nan, '1192', '1115', '1000', '660', '1675', '2410', nan, '862', '1250', '1400', '1800', '1750', '1675', '960', '940', '1425', '926', '861', '1500', '1165', '555', '980', '466', '1234', '750', '780', '650', '630', '600', '1430', '660', '815', '1250', '1170', '1111', '1000', '1150', '829', '700', '950', '1250', '1150', '599', '750', '1590', '1522', '500', '630', '790', '1650', '1487', '1095', '9', '1280', '400', '1400', '1174', '650', '1050', '890', '2000', '911', '1280', '800', '1095', '1553', '1875', '1681', '1400', '1019', '2200', '1380', '1100', '1650', '1070', '1080', '1200', '900', '1115']
[' RAHUL TRIVEDI', ' R Bhattacharjee', ' Ashutosh', ' Sandeep gautam', ' vijay', ' PRASAD', ' Kamal Bansal', ' Avik Dey', ' Harkesh Sharma', ' Bhagat Singh', ' Mangat Rai', ' AN SINHA', ' Nihit', ' Shiv Kumar', ' Right Value Solutions', ' Adhunik Propmart', ' Homes And Desire', ' Investors Lab', ' Vardhman Reality', ' piyush nair', ' A Plus Associates', ' Bricksnwall Innovations Pvt.Ltd.', ' Pragati ', ' Prof Arora', ' Prabhat Sharma', ' Gaurav', ' sanjeev', ' Mohd Sarfaraz', ' Atif', ' Sandeep Jain', ' Mohit', ' Nithin Abraham', ' Arun Yadav', ' Lionel Richie', ' Shubham Solanki', ' Anshul', ' Ajay Sharma', ' Pushpendra Sethi', ' Y S Dwivedi', ' Rahul Arora', ' Devesh', ' RAHUL', ' Kapur', ' sanjay kaushik', ' Naresh', ' Rahul', ' Karun', ' Gurdeep Singh', ' Pradeep Sharma', ' Ravinder Chauhan', ' Axiom Landbase Pvt. Ltd.', ' Jk Batra', ' sunil yadav', ' Rishabh Arora', ' Pankaj Ahlawat', ' VIVEK SHARMA', ' Real Estate Gurgaon', ' Akhilesh Sharma ', ' Ankush', ' Manjunath S', ' Govinda', ' Varun Nayak', ' Jeanson Veigas', ' Chethan', ' Naveen Kamath', ' Devdhar Shetty', ' Gopal', ' Stany', ' Brahmari Vilas', ' Baptist Dcunha', ' Priya', ' Iqbal ', ' Santosh Babu Salian', ' Hassan', ' Rajesh Antony Fernandes', ' jaysen', ' Govinda Sharma', ' hashura', ' Lance', ' Arwin', ' sharath kumar', ' Sameer', ' Sabeena Farhath', ' Suman Kumar', ' pkumar', " James D'SA", ' roshan', ' Sunil Veigas', ' SNEHA', ' D Suryaprakash', ' Bhumika Bansal', ' carlus', ' Vaibhav Shukla', ' Khushboo Kashyap', ' Deekaha', ' Divya Singh', ' Arpit Jain', ' Mahesh', ' geeta shrivastava', ' ajit singh', ' Prem Pvt Ltd', ' vijay', ' Siddharth Buildcon', ' Kusum kumari', ' Priyesh Gupta', ' Devinder Singh', ' womesh womesh', ' sandip Agarwal', ' Chainika', ' Karnjeet Biswas', ' TEJESHWARI SAHU', ' Mitchell', ' Angad Lalwani', ' achyutananda', ' Manoj mati', ' vidyadhar behra', ' Sumit Wadhwani', ' Saurabh upadhyay', ' Shubham Soni', ' Vikas Gulati', ' vishal sharma', ' Arnav Singh', ' Vinod Kumar', ' Deepika Chahar', ' Shikha', ' Shubham Kumar', ' hemant sharma', ' Prem lal Taneja', ' Yatendra kumar mehrotra', ' Umang Mathur', ' Akshit Rajoriya', ' Sanjeev Singh', ' nitin singh', ' ajay kumar', ' Divya pandey', ' GALAXY PRINT PACK', ' SHWETANG SHARMA', ' PRAVEEN KUMAR', ' Sapna', ' Ganpati World', ' Shrey Shankar', ' Rakesh Agarwal ', ' Sanchit gupta', ' PRASHANT', ' adil hussain', ' Mohini Sharma', ' vivek jain', ' VIMAL TIWARI', ' Dr Archika Gupta Dr Archika', ' Amit Kumar Saini', ' Radha Kanha', ' M K Realtech', ' R K Real Estate', ' Vikram', ' Amita Singh', ' Raj Singh', ' Dharm Chauhan', ' Sunil Kumar', ' vineet', ' Sandeep singh', ' Ashok Sood', ' munish', ' Brahmprakash', ' Satish', ' Sachinrajput', ' Sunil Kumar', ' Shailender', ' Sumit Kumar', ' Rahul Rijhwani', ' darshan kumar', ' Chandresh', ' Rahul Chauhan', ' Narendra Kumar Agarwal', ' Vivek Anand Semwal', ' Vibhu', ' SANCHITA', ' kuldeep tomer', ' Parveen khanna', ' Pardeep N/A ', ' Vijay', ' Ravi Nambiar', ' Pick Your Prop Estates LLP', ' Veena', ' Amar Kumar Prasad', ' Jagadeesha', ' Meghana', ' Shashi', ' Satish Krishna', ' ARUN', ' subbareddy subbareddy', ' Mallesh', ' Karthik', ' Nanda', ' Mohammed Mansoor', ' Shriya Mankani', ' Nataraj D M', ' Narasimha murthy', ' Vijay Menon', ' Prakash', ' G S ANANTHA', ' Gowrish Bhaskar', ' Ramachandra Murthy', ' Thulaseedharan pillai A', ' Rathan', ' sheelavathi', ' Nayana A.S', ' Srikanth', ' MANJUNATHA', ' Chakko', nan]
['₹47.5 Lac ', '₹46 Lac ', '₹37 Lac ', '₹24 Lac ', '₹48 Lac ', '₹65.4 Lac ', '₹1 Cr ', '₹27 Lac ', '₹15 Lac ', '₹35 Lac ', '₹65 Lac ', '₹2.10 Cr ', '₹3.31 Cr ', '₹98 Lac ', '₹43.5 Lac ', '₹48.5 Lac ', '₹80 Lac ', '₹61.2 Lac ', '₹56 Lac ', '₹50.5 Lac ', '₹63 Lac ', '₹25.5 Lac ', '₹68.9 Lac ', '₹23 Lac ', '₹55 Lac ', '₹44 Lac ', '₹51 Lac ', '₹65 Lac ', '₹33 Lac ', '₹50 Lac ', '₹80 Lac ', '₹1.80 Cr ', '₹1.50 Cr ', '₹45 Lac ', '₹17 Lac ', '₹93 Lac ', '₹35 Lac ', '₹81.1 Lac ', '₹4.60 Cr ', '₹2.60 Cr ', '₹1.20 Cr ', '₹54 Lac ', '₹1.40 Cr ', '₹1.85 Cr ', '₹99 Lac ', '₹85 Lac ', '₹42 Lac ', '₹1 Cr ', '₹3.50 Cr ', '₹93 Lac ', '₹1.20 Cr ', '₹1.32 Cr ', '₹1.60 Cr ', '₹2.35 Cr ', '₹28 Lac ', '₹10.90 Cr ', '₹46 Lac ', '₹1.50 Cr ', '₹2.28 Cr ', '₹5 Cr ', '₹22 Lac ', '₹79 Lac ', '₹69.5 Lac ', '₹58 Lac ', '₹54.8 Lac ', '₹65 Lac ', '₹2 Cr ', '₹10.1 Lac ', '₹65 Lac ', '₹35 Lac ', '₹19 Lac ', '₹40 Lac ', '₹1.80 Cr ', '₹65 Lac ', '₹75 Lac ', '₹48.5 Lac ', '₹90 Lac ', '₹79 Lac ', '₹65 Lac ', '₹50 Lac ', '₹60 Lac ', '₹35 Lac ', '₹32 Lac ', '₹75 Lac ', '₹1.01 Cr ', '₹40 Lac ', '₹1.20 Cr ', '₹45 Lac ', '₹68.5 Lac ', '₹48 Lac ', '₹25 Lac ', '₹77 Lac ', '₹68 Lac ', '₹52 Lac ', '₹29.1 Lac ', '₹31.8 Lac ', '₹32.5 Lac ', '₹30 Lac ', '₹42 Lac ', '₹7.5 Lac ', '₹23 Lac ', '₹31.5 Lac ', '₹14 Lac ', '₹35 Lac ', '₹25 Lac ', '₹33 Lac ', '₹30 Lac ', '₹33.3 Lac ', '₹1.35 Cr ', '₹49 Lac ', '₹30 Lac ', '₹29.1 Lac ', '₹33 Lac ', '₹43.4 Lac ', '₹25 Lac ', '₹36 Lac ', '₹42.9 Lac ', '₹20 Lac ', '₹12.5 Lac ', '₹25 Lac ', '₹25.5 Lac ', '₹43.5 Lac ', '₹68 Lac ', '₹35 Lac ', '₹1.35 Cr ', '₹65 Lac ', '₹44.4 Lac ', '₹33.5 Lac ', '₹42 Lac ', '₹48 Lac ', '₹47 Lac ', '₹40.5 Lac ', '₹28 Lac ', '₹35 Lac ', '₹17.5 Lac ', '₹60 Lac ', '₹81.9 Lac ', '₹55 Lac ', '₹40 Lac ', '₹42 Lac ', '₹44 Lac ', '₹65 Lac ', '₹59.9 Lac ', '₹51 Lac ', '₹24 Lac ', '₹44 Lac ', '₹65 Lac ', '₹24 Lac ', '₹41.3 Lac ', '₹90 Lac ', '₹38 Lac ', '₹12 Lac ', '₹23 Lac ', '₹13 Lac ', '₹36 Lac ', '₹18 Lac ', '₹23 Lac ', '₹13.5 Lac ', '₹11 Lac ', '₹16.8 Lac ', '₹37 Lac ', '₹28 Lac ', '₹15 Lac ', '₹18.8 Lac ', '₹28 Lac ', '₹17.9 Lac ', '₹19 Lac ', '₹27 Lac ', '₹32.5 Lac ', '₹14.5 Lac ', '₹21 Lac ', '₹30 Lac ', '₹22 Lac ', '₹20 Lac ', '₹20 Lac ', '₹59 Lac ', '₹50 Lac ', '₹10 Lac ', '₹18 Lac ', '₹30 Lac ', '₹80 Lac ', '₹60 Lac ', '₹45 Lac ', '₹68 Lac ', '₹95 Lac ', '₹58 Lac ', '₹82 Lac ', '₹58 Lac ', '₹50 Lac ', '₹85 Lac ', '₹58 Lac ', '₹1.01 Cr ', '₹60 Lac ', '₹56 Lac ', '₹48 Lac ', '₹65.7 Lac ', '₹60 Lac ', '₹75 Lac ', '₹75 Lac ', '₹70 Lac ', '₹52 Lac ', '₹1.45 Cr ', '₹85 Lac ', '₹1.63 Cr ', '₹80 Lac ', '₹40 Lac ', '₹69 Lac ', '₹60 Lac ', '₹60 Lac ', '₹72 Lac ']
['₹3958', '₹3286', nan, '₹3719', '₹2474', '₹5363', nan, '₹4230', '₹4091', '₹4805', '₹3684', '₹6533', '₹6542', '₹9001', '₹3912', '₹4780', '₹4663', '₹5229', '₹5322', '₹5308', '₹4630', '₹5020', '₹2429', '₹4700', '₹2370', '₹3943', '₹3946', '₹4351', '₹2960', '₹3636', '₹8889', '₹10061', nan, '₹5294', '₹3864', '₹4439', '₹5000', '₹4894', '₹12224', '₹9630', '₹8000', '₹5143', '₹7179', '₹11889', '₹8135', '₹8213', '₹4421', '₹5731', '₹14170', '₹6503', '₹7500', '₹9429', '₹7111', '₹11190', '₹3733', '₹25952', '₹4182', '₹7692', '₹12473', nan, '₹3284', '₹6371', '₹3971', '₹4549', '₹3487', nan, '₹5882', nan, '₹4676', '₹3070', '₹2879', nan, '₹4615', '₹6667', '₹5396', '₹4533', '₹4813', '₹6371', '₹6500', '₹3788', '₹5911', '₹3507', '₹3200', '₹5639', '₹5913', '₹4525', '₹6366', '₹3750', nan, '₹3491', '₹2577', '₹4968', '₹3426', '₹6190', '₹2327', '₹3372', '₹3368', '₹3333', '₹4000', '₹1540', '₹2706', '₹3099', '₹2772', '₹3000', '₹3467', '₹2538', '₹2913', '₹1852', '₹7479', '₹3984', '₹3109', '₹2327', '₹3143', '₹3600', '₹2778', '₹3600', '₹2860', '₹2326', '₹2273', '₹2759', '₹2833', '₹2753', '₹5440', '₹3097', '₹4500000', '₹6190', '₹3589', '₹2481', '₹4000', '₹3000', '₹4695', '₹3400', '₹2511', '₹3167', '₹2500', '₹3582', '₹3997', '₹2555', '₹3200', '₹3359', '₹2627', '₹3611', '₹3328', '₹3045', '₹2264', '₹3745', '₹4561', '₹2592', '₹3944', '₹6000', '₹4077', '₹1846', '₹1941', '₹2167', '₹2917', '₹2118', '₹2244', '₹2077', '₹1746', '₹2585', '₹2587', '₹4242', '₹1840', '₹1500', '₹2393', '₹1611', '₹1900', '₹2348', '₹2790', '₹1503', '₹2211', '₹2400', '₹1913', '₹2367', '₹2667', '₹3711', '₹3285', '₹1818', '₹2857', '₹3797', '₹3509', '₹4110', '₹7556', '₹6611', nan, '₹4296', '₹5857', '₹4940', '₹4762', '₹6071', '₹5115', '₹4208', '₹6586', '₹4375', '₹3863', '₹4000', nan, nan, '₹4462', '₹5000', '₹5103', '₹6591', '₹5743', '₹14818', '₹4848', '₹6389', '₹5000', nan, '₹6667', '₹6457']
['Floor17 out of 22', 'Floor7 out of 9', 'Floor8 out of 25', 'Floor1 out of 2', 'Floor8 out of 16', 'Floor9 out of 14', 'Floor18 out of 32', 'Floor14 out of 26', 'Floor1 out of 4', 'Floor25 out of 25', 'Floor16 out of 24', 'Floor11 out of 22', 'Floor6 out of 34', 'Floor3 out of 6', 'Floor5 out of 15', 'Floor11 out of 20', 'Floor6 out of 23', 'Floor19 out of 20', 'Floor12 out of 24', 'Floor26 out of 27', 'Floor17 out of 19', 'Floor1 out of 4', 'Floor10 out of 19', 'Floor1 out of 5', 'Floor20 out of 21', 'Floor6 out of 14', 'Floor4 out of 22', 'Floor7 out of 21', 'Express Park View 2', 'Floor25 out of 25', 'Floor22 out of 25', 'Floor3 out of 3', 'Floor10 out of 14', 'Floor3 out of 10', 'SocietyKendriya Vihar Sector 56', 'Floor1 out of 4', 'Floor7 out of 10', 'Floor19 out of 20', 'Floor9 out of 15', 'Floor3 out of 14', 'Monsoon Breeze Phase 2', 'Floor10 out of 27', 'Floor8 out of 22', 'Floor4 out of 4', 'Floor3 out of 4', 'Floor4 out of 9', 'Floor5 out of 6', 'Floor2 out of 3', 'Floor1 out of 4', 'Floor8 out of 10', 'FloorGround out of 4', 'Floor1 out of 18', 'Floor2 out of 14', 'Floor4 out of 9', 'Floor2 out of 3', 'City Plot Phase 1', 'Floor3 out of 4', 'Floor3 out of 4', 'Floor2 out of 4', 'FloorGround out of 32', 'Floor1 out of 3', 'Floor6 out of 12', 'Floor19 out of 47', 'Floor11 out of 14', 'Floor2 out of 2', 'Floor4 out of 5', 'Floor4 out of 8', 'Floor3 out of 4', 'Floor6 out of 14', 'Floor7 out of 25', 'Floor22 out of 23', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor1 out of 4', 'Floor12 out of 14', 'Floor5 out of 6', 'Floor14 out of 23', 'Floor7 out of 13', 'Floor9 out of 9', 'Floor7 out of 19', 'Floor4 out of 5', 'Floor3 out of 4', 'Floor5 out of 5', 'Floor2 out of 5', 'Floor4 out of 5', 'Floor4 out of 4', 'Floor1 out of 16', 'Floor1 out of 4', 'Floor3 out of 5', 'Floor3 out of 5', 'Floor5 out of 5', 'Floor4 out of 4', 'Floor4 out of 4', 'Floor2 out of 5', 'Floor6 out of 6', 'Floor4 out of 6', 'Floor5 out of 5', 'Basement out of 4', 'Floor5 out of 8', 'Floor3 out of 10', 'FloorGround out of 3', 'Floor1 out of 4', 'Floor1 out of 3', 'Floor2 out of 6', 'Floor3 out of 8', 'Floor6 out of 6', 'Floor10 out of 10', 'Floor7 out of 10', 'Floor3 out of 6', 'Floor3 out of 5', 'Floor7 out of 11', 'Floor4 out of 7', 'Floor4 out of 8', 'Floor5 out of 11', 'FloorGround out of 4', 'Floor1 out of 1', 'Floor2 out of 10', 'Floor2 out of 4', 'Floor3 out of 6', 'Floor5 out of 10', 'Floor4 out of 6', 'Floor2 out of 3', 'Floor1 out of 6', 'Floor3 out of 8', 'Floor3 out of 4', 'Floor2 out of 6', 'Floor2 out of 5', 'Floor1 out of 2', 'Floor2 out of 5', 'Floor8 out of 9', 'Floor1 out of 10', 'Floor2 out of 2', 'Floor1 out of 2', 'Floor1 out of 3', 'Floor9 out of 11', 'FloorGround out of 3', 'Floor3 out of 5', 'Floor7 out of 10', 'Floor4 out of 14', 'Floor5 out of 9', 'Floor3 out of 13', 'Floor1 out of 11', 'Floor2 out of 11', 'Floor7 out of 15', 'Floor1 out of 10', 'Floor3 out of 4', 'Floor11 out of 14', 'Floor2 out of 2', 'Floor12 out of 12', 'Floor3 out of 9', 'Floor9 out of 9', 'Floor2 out of 11', 'Floor2 out of 6', 'Floor6 out of 13', 'Floor5 out of 6', 'Floor7 out of 10', 'Floor3 out of 13', 'Floor3 out of 13', 'Floor1 out of 9', 'Floor4 out of 14', 'Floor13 out of 15', 'Basement out of 15', 'Floor9 out of 16', 'Floor2 out of 14', 'Floor2 out of 4', 'Floor8 out of 13', 'Floor15 out of 15', 'Floor5 out of 7', 'Floor4 out of 9', 'Floor9 out of 14', 'Floor13 out of 14', 'Floor11 out of 16', 'Floor11 out of 15', 'SocietyAvalon Residency Phase 2', 'Floor12 out of 15', 'Floor9 out of 15', 'Floor9 out of 12', 'Floor7 out of 14', 'Floor14 out of 14', 'Floor4 out of 9', 'Floor2 out of 5', 'Floor9 out of 14', 'Floor2 out of 13', 'SocietyKrish City Phase 2', 'Floor2 out of 4', 'Floor6 out of 8', 'Floor3 out of 8', 'Floor2 out of 2', 'Floor1 out of 4', 'Floor4 out of 4', 'Floor4 out of 5', 'Floor8 out of 8', 'Floor2 out of 3', 'Floor2 out of 4', 'Floor3 out of 4', 'Basement out of 1', 'Floor1 out of 4', 'Floor3 out of 4', 'Floor1 out of 5', 'Floor2 out of 4', 'Floor3 out of 6', 'FloorGround out of 10', 'Floor3 out of 4', 'Floor7 out of 7', 'Floor4 out of 7', 'FloorGround out of 3', 'Floor1 out of 3', 'FloorGround out of 2', 'FloorGround out of 5', 'Floor1 out of 3', 'Floor3 out of 4', 'Floor3 out of 3', 'FloorGround out of 4']
210
210
210
210
210
210
210
213
In [181]:
# inserting NaN's at index positions
Owner.append(np.nan)
In [182]:
# inserting NaN's at index positions
Carpet_Area.insert(8,np.nan)
Carpet_Area.insert(45,np.nan)
Carpet_Area.insert(67,np.nan)
Carpet_Area.insert(124,np.nan)
Carpet_Area.insert(130,np.nan)
Carpet_Area.insert(137,np.nan)
In [ ]:
 
In [183]:
# inserting NaN's at index positions
per_sqt.insert(5,np.nan)
per_sqt.insert(2,np.nan)
per_sqt.insert(32,np.nan)
per_sqt.insert(59,np.nan)
per_sqt.insert(65,np.nan)
per_sqt.insert(67,np.nan)
per_sqt.insert(71,np.nan)
per_sqt.insert(88,np.nan)
per_sqt.insert(184,np.nan)
per_sqt.insert(196,np.nan)
per_sqt.insert(197,np.nan)
per_sqt.insert(207,np.nan)
In [185]:
# Creation the Dict with cloumns names as strings and values as column names in which values are present.
df3 = {'City':City,
             'Area':Area,
             'Owner':Owner,
             'BHK':BHK,
             'Floor':Floor[0:210],
             'price':price,
             'Carpet_Area':Carpet_Area,
             'per_sqt':per_sqt
}
In [ ]:
 
In [186]:
# creation of DataFrame
Project3=pd.DataFrame(df3)
In [187]:
Project3
Out[187]:
City Area Owner BHK Floor price Carpet_Area per_sqt
0 greater-noida Zeta 1 RAHUL TRIVEDI 2 Floor17 out of 22 ₹47.5 Lac 1200 ₹3958
1 greater-noida Sector P4 R Bhattacharjee 3 Floor7 out of 9 ₹46 Lac 1120 ₹3286
2 greater-noida Eta 2 Ashutosh 3 Floor8 out of 25 ₹37 Lac 995 NaN
3 greater-noida Sector 3 Sandeep gautam 2 Floor1 out of 2 ₹24 Lac 970 ₹3719
4 greater-noida Zeta 1 vijay 2 Floor8 out of 16 ₹48 Lac 895 ₹2474
... ... ... ... ... ... ... ... ...
205 mysore Nayana A.S 2 FloorGround out of 3 ₹40 Lac 1070 ₹6389
206 mysore Kuvempunagar Srikanth 2 Floor1 out of 3 ₹69 Lac 1080 ₹5000
207 mysore MANJUNATHA 2 FloorGround out of 2 ₹60 Lac 1200 NaN
208 mysore V V Mohalla Chakko 2 FloorGround out of 5 ₹60 Lac 900 ₹6667
209 mysore Sriramapura NaN 2 Floor1 out of 3 ₹72 Lac 1115 ₹6457

210 rows × 8 columns

In [188]:
# conversion of DataFrame to csv
Project3.to_csv('DF3.csv')
In [189]:
# DataFrame concatenation into the final DF
FINALPROJECT = pd.concat([Project1,Project2,Project3])
In [190]:
# Resetting the index numbers after formaton of the finam DF
FINALPROJECT = FINALPROJECT.reset_index()
In [191]:
# Final uncleaned DataFrame
FINALPROJECT
Out[191]:
index City Area Owner BHK Floor price Carpet_Area per_sqt
0 0 bangalore Kattigenahalli Omkar Pandey 3 FloorGround out of 4 ₹60 Lac 1000 ₹4950
1 1 bangalore Ayodaya Nagar gokul 2 FloorGround out of 4 ₹80 Lac 1150 ₹6957
2 2 bangalore Electronic City Thayumanavan 2 Floor4 out of 4 ₹48 Lac 1060 ₹4528
3 3 bangalore Tumkur Road Prasanna 3 Floor14 out of 15 ₹1.35 Cr 1790 ₹7542
4 4 bangalore Kudlu Gate Akash Akash 2 Floor2 out of 4 ₹61 Lac 803 ₹5706
... ... ... ... ... ... ... ... ... ...
415 205 mysore Nayana A.S 2 FloorGround out of 3 ₹40 Lac 1070 ₹6389
416 206 mysore Kuvempunagar Srikanth 2 Floor1 out of 3 ₹69 Lac 1080 ₹5000
417 207 mysore MANJUNATHA 2 FloorGround out of 2 ₹60 Lac 1200 NaN
418 208 mysore V V Mohalla Chakko 2 FloorGround out of 5 ₹60 Lac 900 ₹6667
419 209 mysore Sriramapura NaN 2 Floor1 out of 3 ₹72 Lac 1115 ₹6457

420 rows × 9 columns

In [192]:
# final scraped data ---> uncleaned
FINALPROJECT.to_csv('Magicbrics_Project.csv')
In [ ]:
 

Exploratory Data_Analysis

DataFrame Cleaning

In [ ]:
 
In [2]:
# import libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
plt.style.use('bmh')

import warnings
warnings.filterwarnings('ignore')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# for HD visualizations
%config InlineBackend.figure_format='retina'
In [3]:
# load the Dataset
dff = pd.read_csv(r'C:\Users\GUDLA RAGUWING\Data Science Course\MagicBrics_Webscraping_project\Magicbrics_Project.csv')
In [5]:
dff.head(20)
Out[5]:
Unnamed: 0 index City Area Owner BHK Floor price Carpet_Area per_sqt
0 0 0 bangalore Kattigenahalli Omkar Pandey 3 FloorGround out of 4 ₹60 Lac 1000.0 ₹4950
1 1 1 bangalore Ayodaya Nagar gokul 2 FloorGround out of 4 ₹80 Lac 1150.0 ₹6957
2 2 2 bangalore Electronic City Thayumanavan 2 Floor4 out of 4 ₹48 Lac 1060.0 ₹4528
3 3 3 bangalore Tumkur Road Prasanna 3 Floor14 out of 15 ₹1.35 Cr 1790.0 ₹7542
4 4 4 bangalore Kudlu Gate Akash Akash 2 Floor2 out of 4 ₹61 Lac 803.0 ₹5706
5 5 5 bangalore Hal Stage 2 av nath 3 Floor1 out of 4 ₹1.30 Cr 1640.0 ₹7303
6 6 6 bangalore Murugeshpalya Sangeeta Pillai 2 Floor3 out of 5 ₹50 Lac 1250.0 ₹5000
7 7 7 bangalore Begur Road ganga k 3 Basement out of 17 ₹1.80 Cr 1588.0 ₹11335
8 8 8 bangalore Judicial Layout Anjan K 3 Floor3 out of 14 ₹1 Cr 1000.0 ₹8889
9 9 9 bangalore Bellandur Omesh Saraf 2 Floor2 out of 10 ₹1.02 Cr 1465.0 ₹6997
10 10 10 bangalore Kasturi Nagar Mohd Hussain 3 FloorGround out of 7 ₹1.45 Cr 1500.0 ₹8146
11 11 11 bangalore Koramangala Block 1 Rahul Jain 3 Floor16 out of 16 ₹3.85 Cr 1418.0 ₹20632
12 12 12 bangalore Sarjapur Road Gaurav Kumar GAURAV 3 FloorGround out of 5 ₹70 Lac 1350.0 ₹5655
13 13 13 bangalore Devanahalli Ancy 2 Floor1 out of 7 ₹85 Lac 1000.0 ₹6071
14 14 14 bangalore Hanumantha Nagar Anoop 2 Floor2 out of 4 ₹98 Lac 1250.0 ₹9800
15 15 15 bangalore geetha 2 Floor2 out of 5 ₹60 Lac 857.0 ₹6367
16 16 16 bangalore Doddakannelli Omkar Omkar 2 Floor3 out of 6 ₹78 Lac 753.0 ₹5158
17 17 17 bangalore Electronic City ruchika 2 Floor1 out of 7 ₹57 Lac 1186.0 ₹6374
18 18 18 bangalore Sarjapur Road jeswanth 3 Floor1 out of 4 ₹1.08 Cr 1250.0 ₹4553
19 19 19 bangalore Jigani vikas saxena 3 Floor6 out of 12 ₹57 Lac 770.0 ₹4545
In [541]:
# Droping the unwnated columns
dff.drop(['Unnamed: 0','index'], axis = 1, inplace = True)
In [542]:
dff.head()
Out[542]:
City Area Owner BHK Floor price Carpet_Area per_sqt
0 bangalore Kattigenahalli Omkar Pandey 3 FloorGround out of 4 ₹60 Lac 1000.00 ₹4950
1 bangalore Ayodaya Nagar gokul 2 FloorGround out of 4 ₹80 Lac 1150.00 ₹6957
2 bangalore Electronic City Thayumanavan 2 Floor4 out of 4 ₹48 Lac 1060.00 ₹4528
3 bangalore Tumkur Road Prasanna 3 Floor14 out of 15 ₹1.35 Cr 1790.00 ₹7542
4 bangalore Kudlu Gate Akash Akash 2 Floor2 out of 4 ₹61 Lac 803.00 ₹5706

Check for duplicated()¶

In [543]:
dff.duplicated()
dff.duplicated().value_counts()
Out[543]:
0      False
1      False
2      False
3      False
4      False
       ...  
415    False
416    False
417    False
418    False
419    False
Length: 420, dtype: bool
Out[543]:
False    420
dtype: int64

No duplicates found in my DataFrame¶

Handling Missing Values¶

Identify Missing Values¶

In [544]:
dff.isna()
Out[544]:
City Area Owner BHK Floor price Carpet_Area per_sqt
0 False False False False False False False False
1 False False False False False False False False
2 False False False False False False False False
3 False False False False False False False False
4 False False False False False False False False
... ... ... ... ... ... ... ... ...
415 False False False False False False False False
416 False False False False False False False False
417 False False False False False False False True
418 False False False False False False False False
419 False False True False False False False False

420 rows × 8 columns

In [545]:
dff.isna().sum()
Out[545]:
City            0
Area            0
Owner           1
BHK             0
Floor           3
price           0
Carpet_Area     8
per_sqt        29
dtype: int64

View the missing values¶

In [546]:
dff[dff.per_sqt.isna()]
Out[546]:
City Area Owner BHK Floor price Carpet_Area per_sqt
32 mumbai Borivali West Sachin Kunder 2 Floor5 out of 15 ₹2.25 Cr 1160.00 NaN
39 mumbai Mahim West Satyajit Satyajit 2 Floor6 out of 12 ₹3.75 Cr 800.00 NaN
43 mumbai Haroon mansuri 2 Floor21 out of 21 ₹60 Lac 1870.00 NaN
60 chennai Mannivakkam Chennai Yogesh Baskaran 2 FloorGround out of 2 ₹55 Lac 800.00 NaN
67 chennai Mogappair Chennai Elango Rajendran 2 Floor16 out of 16 ₹53 Lac 852.00 NaN
115 visakhapatnam Tagarapuvalsa Vision Properties 3 Floor3 out of 5 ₹38.9 Lac 800.00 NaN
116 visakhapatnam Parawada Honey Group 2 Floor5 out of 5 ₹26 Lac 900.00 NaN
117 visakhapatnam PM Palem New Living Properties 2 Floor3 out of 5 ₹42 Lac 650.00 NaN
118 visakhapatnam Madhurwada podilapu simhachalam 2 NaN ₹38 Lac 800.00 NaN
119 visakhapatnam MVP Colony Dinakar 2 NaN ₹78 Lac NaN NaN
131 ranchi Hesag rajesh singh 3 Floor4 out of 6 ₹77 Lac 1212.00 NaN
145 ranchi taha 5 FloorGround out of 8 ₹1.05 Cr 1650.00 NaN
173 haridwar Jwalapur Abhishek chaudhary 2 Floor3 out of 3 ₹37 Lac 900.00 NaN
191 vadodara Nani Bapod Umakant parmar 1 Floor1 out of 3 ₹7.5 Lac 1010.00 NaN
192 vadodara Race Course circle Akshat Dani 2 Floor3 out of 5 ₹45 Lac 850.00 NaN
199 vadodara Bhayli Shashank 2 Floor4 out of 4 ₹35 Lac 1150.00 NaN
204 vadodara Sayajipura Ankur Javia 2 Floor5 out of 5 ₹19 Lac 1580.00 NaN
212 greater-noida Eta 2 Ashutosh 3 Floor8 out of 25 ₹37 Lac 995.00 NaN
216 greater-noida Greater Noida West Kamal Bansal 4 Floor18 out of 32 ₹1 Cr 2364.00 NaN
242 gurgaon Kendriya Vihar Arun Yadav 3 Floor10 out of 14 ₹1.50 Cr 1500.00 NaN
269 gurgaon Sector 65 Manjunath S 3 FloorGround out of 32 ₹5 Cr 3069.00 NaN
275 mangalore Kulai Devdhar Shetty 2 Floor4 out of 5 ₹65 Lac 906.00 NaN
277 mangalore Ujire Stany Floor3 out of 4 ₹10.1 Lac NaN NaN
281 mangalore Shakti Nagar Iqbal 2 Floor5 out of 5 ₹40 Lac 1166.00 NaN
298 mangalore Nandigudda SNEHA 3 Floor3 out of 5 ₹68.5 Lac 1341.00 NaN
394 mysore Yadavgiri Jagadeesha 2 Floor2 out of 4 ₹95 Lac 1280.00 NaN
406 mysore Vidyarayanapuram Narasimha murthy 3 Floor1 out of 4 ₹60 Lac 1553.00 NaN
407 mysore Srirangapatnam Vijay Menon 3 Floor3 out of 4 ₹75 Lac 1875.00 NaN
417 mysore MANJUNATHA 2 FloorGround out of 2 ₹60 Lac 1200.00 NaN
In [547]:
dff[dff.Carpet_Area.isna()]
Out[547]:
City Area Owner BHK Floor price Carpet_Area per_sqt
119 visakhapatnam MVP Colony Dinakar 2 NaN ₹78 Lac NaN NaN
209 vadodara Jiten chokshi 4 NaN ₹1.20 Cr NaN ₹4615
218 greater-noida Yamuna Expressway Harkesh Sharma 1 Floor1 out of 4 ₹15 Lac NaN ₹4091
255 gurgaon New Colony Rahul 3 Floor4 out of 9 ₹85 Lac NaN ₹8213
277 mangalore Ujire Stany Floor3 out of 4 ₹10.1 Lac NaN NaN
334 agra Dayal Bagh Shikha 5 Floor3 out of 4 ₹1.35 Cr NaN ₹4500000
340 agra Dayal Bagh Akshit Rajoriya 3 Floor1 out of 10 ₹47 Lac NaN ₹4695
347 agra Fatehabad Road PRAVEEN KUMAR 3 Floor7 out of 10 ₹55 Lac NaN ₹2555
In [548]:
dff[dff.Floor.isna()]
Out[548]:
City Area Owner BHK Floor price Carpet_Area per_sqt
118 visakhapatnam Madhurwada podilapu simhachalam 2 NaN ₹38 Lac 800.00 NaN
119 visakhapatnam MVP Colony Dinakar 2 NaN ₹78 Lac NaN NaN
209 vadodara Jiten chokshi 4 NaN ₹1.20 Cr NaN ₹4615
In [549]:
dff[dff.Owner.isna()]
Out[549]:
City Area Owner BHK Floor price Carpet_Area per_sqt
419 mysore Sriramapura NaN 2 Floor1 out of 3 ₹72 Lac 1115.00 ₹6457

Identify the corrupted data¶

In [550]:
dff.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   City         420 non-null    object 
 1   Area         420 non-null    object 
 2   Owner        419 non-null    object 
 3   BHK          420 non-null    object 
 4   Floor        417 non-null    object 
 5   price        420 non-null    object 
 6   Carpet_Area  412 non-null    float64
 7   per_sqt      391 non-null    object 
dtypes: float64(1), object(7)
memory usage: 26.4+ KB

some columns should be of int and float but not , so we need to investigate why it is happened¶

  • for that draw-out some columns to see what is the issue for the wrong datatype
In [551]:
dff[['BHK','price','Carpet_Area','per_sqt']].iloc[40:50]
Out[551]:
BHK price Carpet_Area per_sqt
40 2 ₹2.70 Cr 450.00 ₹11529
41 1 ₹1.02 Cr 430.00 ₹38459
42 1 ₹95 Lac 675.00 ₹7242
43 2 ₹60 Lac 1870.00 NaN
44 4 ₹15 Cr 820.00 ₹10000
45 2 ₹32 Lac 850.00 ₹14583
46 2 ₹98 Lac 1172.00 ₹5376
47 3 ₹4.89 Cr 404.00 ₹12042
48 1 ₹44.9 Lac 575.00 ₹13669
49 2 ₹70 Lac 507.00 ₹19375
  • It is clearly observed that some columns having NaN's and some having symbols and text are the reason for the wrong datatype

replacing Symbols and text with nothing¶

In [552]:
dff.per_sqt=dff.per_sqt.str.replace('₹', '')
dff.price= dff.price.str.replace('₹','')
dff.price = dff.price.str.replace('Cr','00000')
dff.price = dff.price.str.replace('Lac','00000')
dff.price=dff.price.str.replace('.','')
dff.price=dff.price.str.replace(' ','')
dff.Floor=dff.Floor.str.replace('Floor','')
dff.BHK=dff.BHK.str.replace(' ','3')
In [553]:
dff.rename(columns = {'price':'price_in_crores', 'per_sqt':'per_sqt_rupees'}, inplace = True)
In [554]:
dff.price_in_crores=dff.price_in_crores.astype(int)
In [555]:
dff.head(50)
Out[555]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
0 bangalore Kattigenahalli Omkar Pandey 3 Ground out of 4 6000000 1000.00 4950
1 bangalore Ayodaya Nagar gokul 2 Ground out of 4 8000000 1150.00 6957
2 bangalore Electronic City Thayumanavan 2 4 out of 4 4800000 1060.00 4528
3 bangalore Tumkur Road Prasanna 3 14 out of 15 13500000 1790.00 7542
4 bangalore Kudlu Gate Akash Akash 2 2 out of 4 6100000 803.00 5706
5 bangalore Hal Stage 2 av nath 3 1 out of 4 13000000 1640.00 7303
6 bangalore Murugeshpalya Sangeeta Pillai 2 3 out of 5 5000000 1250.00 5000
7 bangalore Begur Road ganga k 3 Basement out of 17 18000000 1588.00 11335
8 bangalore Judicial Layout Anjan K 3 3 out of 14 100000 1000.00 8889
9 bangalore Bellandur Omesh Saraf 2 2 out of 10 10200000 1465.00 6997
10 bangalore Kasturi Nagar Mohd Hussain 3 Ground out of 7 14500000 1500.00 8146
11 bangalore Koramangala Block 1 Rahul Jain 3 16 out of 16 38500000 1418.00 20632
12 bangalore Sarjapur Road Gaurav Kumar GAURAV 3 Ground out of 5 7000000 1350.00 5655
13 bangalore Devanahalli Ancy 2 1 out of 7 8500000 1000.00 6071
14 bangalore Hanumantha Nagar Anoop 2 2 out of 4 9800000 1250.00 9800
15 bangalore geetha 2 2 out of 5 6000000 857.00 6367
16 bangalore Doddakannelli Omkar Omkar 2 3 out of 6 7800000 753.00 5158
17 bangalore Electronic City ruchika 2 1 out of 7 5700000 1186.00 6374
18 bangalore Sarjapur Road jeswanth 3 1 out of 4 10800000 1250.00 4553
19 bangalore Jigani vikas saxena 3 6 out of 12 5700000 770.00 4545
20 bangalore Ranju 2 9 out of 9 3500000 2150.00 7500
21 bangalore tezal 4 1 out of 4 18000000 1900.00 12500
22 bangalore Jaya Nagar Block 3 shravan 3 2 out of 4 27500000 1460.00 7705
23 bangalore whitefield Bhav 3 5 out of 5 11200000 1885.00 5305
24 bangalore whitefield Nagananda 3 2 out of 4 100000 1950.00 7436
25 bangalore Harlur MD Fuzail 3 1 out of 3 14500000 1250.00 5333
26 bangalore Sarjapur Road ashwanth 3 13 out of 27 8000000 1820.00 7418
27 bangalore Hoodi Partha Sarma 3 5 out of 13 13500000 2060.00 10194
28 bangalore Haralur Ma Shekar 3 3 out of 3 21000000 740.00 4650
29 bangalore Yelahanka Sashank Constructions 2 Ground out of 4 51200000 331.00 26471
30 mumbai Daulat Nagar K SH 1 8 out of 9 7300000 310.00 23276
31 mumbai Samel Pada NIYATI 1 1 out of 5 2200000 610.00 16667
32 mumbai Borivali West Sachin Kunder 2 5 out of 15 22500000 1160.00 NaN
33 mumbai Goregaon West Motashaw Motashaw 3 4 out of 7 27000000 665.00 23636
34 mumbai Vasai East MONCY BHASKAR 1 Ground out of 76 4100000 1052.00 37500
35 mumbai Worli Anam khan 3 4 out of 4 59900000 685.00 27136
36 mumbai Kurla East Milind Desai 2 Ground out of 3 12000000 400.00 18545
37 mumbai Andheri East Jas Baljeet J 1 1 out of 6 13000000 332.00 60000
38 mumbai Kandivali East Gangaram Dhuri 1 5 out of 20 6500000 776.00 5047
39 mumbai Mahim West Satyajit Satyajit 2 6 out of 12 37500000 800.00 NaN
40 mumbai Borivali West vasant ahir 2 7 out of 7 27000000 450.00 11529
41 mumbai Chembur West Rajendra 1 Ground out of 4 10200000 430.00 38459
42 mumbai Aditi Shah 1 1 out of 3 9500000 675.00 7242
43 mumbai Haroon mansuri 2 21 out of 21 6000000 1870.00 NaN
44 mumbai Dadar West kishor kishor 4 3 out of 3 1500000 820.00 10000
45 mumbai Boisar West Medha Naik 2 7 out of 21 3200000 850.00 14583
46 mumbai Mira Road Anita 2 5 out of 7 9800000 1172.00 5376
47 mumbai K jagdish Dassani 3 5 out of 7 48900000 404.00 12042
48 mumbai Virar West Raghav Sharma 1 4 out of 15 44900000 575.00 13669
49 mumbai Vasai West nandan lanjekar 2 9 out of 15 7000000 507.00 19375
In [ ]:
 

Filling the numerical missing values with mean or median of the column whichever is suitable based-on a serious analysis¶

In [556]:
np.round(dff.isna().sum()/len(dff)*100, 2).astype(str) + '%'
Out[556]:
City                0.0%
Area                0.0%
Owner              0.24%
BHK                 0.0%
Floor              0.71%
price_in_crores     0.0%
Carpet_Area         1.9%
per_sqt_rupees      6.9%
dtype: object
  • since no column missing values % is more than 50% so am going for filling the missing values with mean|median|mode based on the column type()

BHK Column¶

In [557]:
dff.BHK.isna().sum()
Out[557]:
0
In [558]:
dff.BHK = dff.BHK.astype(str)
In [559]:
sns.boxplot(data=dff.BHK);

Area Column¶

In [560]:
dff.Area.iloc[[47,88,102,120,122,144,196,205,280,289]]
# Replace these values with NaN's and then fill them with mode of the column
droplist1=[47,88,102,120,122,144,196,205,280,289]

dff.Area= dff.Area.drop(droplist1,axis=0)

dff.Area.iloc[102]
Out[560]:
47          K
88         Gu
102      Poth
120         H
122         S
144         H
196     Bill 
205     Bill 
280         K
289      Jepp
Name: Area, dtype: object
Out[560]:
nan
In [561]:
dff = dff.replace(r'^\s*$', np.nan, regex=True)
print(dff.head(20))
         City                   Area                 Owner BHK  \
0   bangalore         Kattigenahalli          Omkar Pandey   3   
1   bangalore          Ayodaya Nagar                 gokul   2   
2   bangalore       Electronic City           Thayumanavan   2   
3   bangalore           Tumkur Road               Prasanna   3   
4   bangalore            Kudlu Gate            Akash Akash   2   
5   bangalore            Hal Stage 2               av nath   3   
6   bangalore          Murugeshpalya       Sangeeta Pillai   2   
7   bangalore            Begur Road                ganga k   3   
8   bangalore        Judicial Layout               Anjan K   3   
9   bangalore              Bellandur           Omesh Saraf   2   
10  bangalore         Kasturi Nagar          Mohd Hussain    3   
11  bangalore   Koramangala Block 1             Rahul Jain   3   
12  bangalore         Sarjapur Road    Gaurav Kumar GAURAV   3   
13  bangalore           Devanahalli                   Ancy   2   
14  bangalore       Hanumantha Nagar                 Anoop   2   
15  bangalore                    NaN                geetha   2   
16  bangalore         Doddakannelli            Omkar Omkar   2   
17  bangalore       Electronic City                ruchika   2   
18  bangalore         Sarjapur Road               jeswanth   3   
19  bangalore                Jigani           vikas saxena   3   

                 Floor  price_in_crores  Carpet_Area per_sqt_rupees  
0      Ground out of 4          6000000      1000.00           4950  
1      Ground out of 4          8000000      1150.00           6957  
2           4 out of 4          4800000      1060.00           4528  
3         14 out of 15         13500000      1790.00           7542  
4           2 out of 4          6100000       803.00           5706  
5           1 out of 4         13000000      1640.00           7303  
6           3 out of 5          5000000      1250.00           5000  
7   Basement out of 17         18000000      1588.00          11335  
8          3 out of 14           100000      1000.00           8889  
9          2 out of 10         10200000      1465.00           6997  
10     Ground out of 7         14500000      1500.00           8146  
11        16 out of 16         38500000      1418.00          20632  
12     Ground out of 5          7000000      1350.00           5655  
13          1 out of 7          8500000      1000.00           6071  
14          2 out of 4          9800000      1250.00           9800  
15          2 out of 5          6000000       857.00           6367  
16          3 out of 6          7800000       753.00           5158  
17          1 out of 7          5700000      1186.00           6374  
18          1 out of 4         10800000      1250.00           4553  
19         6 out of 12          5700000       770.00           4545  
In [562]:
# filling Area column with mode of the column
dff.Area.value_counts()
dff.Area.mode()
dff.Area.mode().values
Ar_mode=dff.Area.mode().values[0]
dff.Area.fillna(Ar_mode, inplace=True)
dff.Area.isna().sum()
Out[562]:
 Alwar Bypass Road      22
 Greater Noida West      7
 Jwalapur                6
 Sikandra                5
 Dayal Bagh              5
                        ..
 Sector 2 Masibari       1
 Upper Chutia            1
 Hazaribag Road          1
 Lalpur                  1
 Sriramapura             1
Name: Area, Length: 261, dtype: int64
Out[562]:
0     Alwar Bypass Road 
Name: Area, dtype: object
Out[562]:
array([' Alwar Bypass Road '], dtype=object)
Out[562]:
0

Owner Column --- Catagorical¶

In [563]:
# filling Owner column with mode of the column
dff.Owner.value_counts()
dff.Owner.mode()
dff.Owner.mode().values
Ow_mode=dff.Owner.mode().values[0]
dff.Owner.fillna(Ow_mode, inplace=True)
dff.Owner.isna().sum()
Out[563]:
 Prasanna         2
 Sunil Kumar      2
 vijay            2
 Srikanth         2
 sanjeev          2
                 ..
 Mukesh Mukesh    1
 Manit Kumar      1
 Neeraj           1
 Abhishek         1
 Chakko           1
Name: Owner, Length: 414, dtype: int64
Out[563]:
0        Prasanna
1        Srikanth
2     Sunil Kumar
3         sanjeev
4           vijay
Name: Owner, dtype: object
Out[563]:
array([' Prasanna', ' Srikanth', ' Sunil Kumar', ' sanjeev', ' vijay'],
      dtype=object)
Out[563]:
0
In [564]:
dff[dff.Owner.isna()]
Out[564]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees

Floor Column --- Catagorical¶

In [565]:
# filling Floor column with mode of the column
dff.Floor.value_counts()
dff.Floor.mode()
dff.Floor.mode().values
FL_mode=dff.Floor.mode().values[0]
dff.Floor.fillna(FL_mode, inplace=True)
dff.Floor.isna().sum()
Out[565]:
3 out of 4          24
1 out of 4          22
2 out of 4          20
4 out of 4          16
2 out of 5          14
                    ..
17 out of 19         1
26 out of 27         1
12 out of 24         1
Ground out of 76     1
Ground out of 10     1
Name: Floor, Length: 145, dtype: int64
Out[565]:
0    3 out of 4
Name: Floor, dtype: object
Out[565]:
array(['3 out of 4'], dtype=object)
Out[565]:
0
In [566]:
dff[dff.Floor.isna()]
Out[566]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
In [567]:
dff.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   City             420 non-null    object 
 1   Area             420 non-null    object 
 2   Owner            420 non-null    object 
 3   BHK              420 non-null    object 
 4   Floor            420 non-null    object 
 5   price_in_crores  420 non-null    int32  
 6   Carpet_Area      412 non-null    float64
 7   per_sqt_rupees   391 non-null    object 
dtypes: float64(1), int32(1), object(6)
memory usage: 24.7+ KB

per_sqt column --- Numerical¶

Check the skewness of the per_sqt column¶

In [568]:
dff.per_sqt_rupees.skew()
# As you can observe my per_sqt skewness in extremely high that represents that huge outliers are present.
# Or intensional put there.
Out[568]:
19.756402504713776

Replacing Some of the values which are wrong with NaN's and filling them with median¶

In [569]:
# creating the droplist
droplist=[30,31,49,34,37,41,51,334]
In [570]:
# droping the droplist
dff.per_sqt_rupees= dff.per_sqt_rupees.drop(droplist,axis=0)

dff.per_sqt_rupees.iloc[334]
Out[570]:
nan
In [571]:
dff.per_sqt_rupees=dff.per_sqt_rupees.astype(float)
In [572]:
# calculatin the median using pandas and filling using fillna()
per_median=dff.per_sqt_rupees.median()
dff.per_sqt_rupees.fillna(per_median, inplace=True)
In [573]:
# type casting
dff.per_sqt_rupees=dff.per_sqt_rupees.astype(int)
In [574]:
dff[dff.per_sqt_rupees.isna()]
Out[574]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
In [575]:
dff.per_sqt_rupees.skew()
# As you can see that the skewness is significantly reduced
Out[575]:
3.503912518035357

Identifying Outliers¶

IQR Method¶

In [576]:
import scipy.stats as stats
In [577]:
Q1, Q2, Q3 = tuple(dff.per_sqt_rupees.quantile(q = [0.25,0.5,0.75]).values)

print(Q1, Q2, Q3)

IQR = Q3-Q1
UL = Q3 + 1.5*IQR
LL = Q1-1.5*IQR

print(IQR, UL, LL)
3295.0 4274.0 5481.25
2186.25 8760.625 15.625
In [578]:
dff[(dff.per_sqt_rupees > UL) | (dff.per_sqt_rupees < LL)]
Out[578]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
7 bangalore Begur Road ganga k 3 Basement out of 17 18000000 1588.00 11335
8 bangalore Judicial Layout Anjan K 3 3 out of 14 100000 1000.00 8889
11 bangalore Koramangala Block 1 Rahul Jain 3 16 out of 16 38500000 1418.00 20632
14 bangalore Hanumantha Nagar Anoop 2 2 out of 4 9800000 1250.00 9800
21 bangalore Alwar Bypass Road tezal 4 1 out of 4 18000000 1900.00 12500
27 bangalore Hoodi Partha Sarma 3 5 out of 13 13500000 2060.00 10194
29 bangalore Yelahanka Sashank Constructions 2 Ground out of 4 51200000 331.00 26471
33 mumbai Goregaon West Motashaw Motashaw 3 4 out of 7 27000000 665.00 23636
35 mumbai Worli Anam khan 3 4 out of 4 59900000 685.00 27136
36 mumbai Kurla East Milind Desai 2 Ground out of 3 12000000 400.00 18545
40 mumbai Borivali West vasant ahir 2 7 out of 7 27000000 450.00 11529
44 mumbai Dadar West kishor kishor 4 3 out of 3 1500000 820.00 10000
45 mumbai Boisar West Medha Naik 2 7 out of 21 3200000 850.00 14583
47 mumbai Alwar Bypass Road jagdish Dassani 3 5 out of 7 48900000 404.00 12042
48 mumbai Virar West Raghav Sharma 1 4 out of 15 44900000 575.00 13669
50 mumbai Bhandup West amit chalke 1 17 out of 18 10500000 558.00 21333
52 mumbai Goregaon East Ritu 2 7 out of 7 12500000 1130.00 22951
53 mumbai Marol Maroshi Road Khuzema Tajir 3 15 out of 23 19000000 562.00 10695
54 mumbai Malad West Dipti solanki 2 3 out of 4 15500000 535.00 25316
59 mumbai Mulund West Savith Raghavan Savith Raghavan 2 1 out of 2 16000000 1044.00 8947
69 chennai Iyyappanthangal Chennai vinothan 2 1 out of 2 12500000 700.00 9140
73 chennai Velachery Chennai Kads 2 2 out of 4 8500000 464.00 11616
82 chennai AGS Colony Velachery Chennai R Premalatha 1 1 out of 2 3500000 1194.00 11818
223 greater-noida Zeta 1 Shiv Kumar 4 3 out of 6 9800000 2200.00 9001
240 gurgaon Jal Vayu Vihar Mohit 2 22 out of 25 8000000 900.00 8889
241 gurgaon Sohna Sector 32 Nithin Abraham 3 3 out of 3 18000000 1789.00 10061
248 gurgaon Sector 112 Y S Dwivedi 3 9 out of 15 46000000 3763.00 12224
249 gurgaon Sector 102 Rahul Arora 4 3 out of 14 26000000 1800.00 9630
253 gurgaon DLF City Phase 1 sanjay kaushik 4 4 out of 4 18500000 1556.00 11889
258 gurgaon Sector 47 Pradeep Sharma 3 1 out of 4 35000000 2470.00 14170
261 gurgaon DLF City Phase 1 Jk Batra 2 1 out of 18 13200000 1250.00 9429
263 gurgaon South City 2 Rishabh Arora 4 4 out of 9 23500000 1800.00 11190
265 gurgaon Sector 54 VIVEK SHARMA 3 City Plot Phase 1 109000000 3650.00 25952
268 gurgaon Sector 65 Ankush 3 2 out of 4 22800000 1500.00 12473
413 mysore Hebbal 2nd Stage Rathan 2 7 out of 7 16300000 1100.00 14818

Z-score method¶

In [579]:
# Suppress Scientific Notation in Numpy
np.set_printoptions(suppress=True)

# Suppress Scientific Notation in Pandas
pd.set_option('display.float_format', '{:.2f}'.format)
In [580]:
zscore_array = stats.zscore(dff.per_sqt_rupees)
In [581]:
np.where(zscore_array>2,1,0).sum()
np.where(zscore_array<-2,1,0).sum()
Out[581]:
15
Out[581]:
0
In [582]:
dff[zscore_array>2]
Out[582]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
11 bangalore Koramangala Block 1 Rahul Jain 3 16 out of 16 38500000 1418.00 20632
21 bangalore Alwar Bypass Road tezal 4 1 out of 4 18000000 1900.00 12500
29 bangalore Yelahanka Sashank Constructions 2 Ground out of 4 51200000 331.00 26471
33 mumbai Goregaon West Motashaw Motashaw 3 4 out of 7 27000000 665.00 23636
35 mumbai Worli Anam khan 3 4 out of 4 59900000 685.00 27136
36 mumbai Kurla East Milind Desai 2 Ground out of 3 12000000 400.00 18545
45 mumbai Boisar West Medha Naik 2 7 out of 21 3200000 850.00 14583
48 mumbai Virar West Raghav Sharma 1 4 out of 15 44900000 575.00 13669
50 mumbai Bhandup West amit chalke 1 17 out of 18 10500000 558.00 21333
52 mumbai Goregaon East Ritu 2 7 out of 7 12500000 1130.00 22951
54 mumbai Malad West Dipti solanki 2 3 out of 4 15500000 535.00 25316
258 gurgaon Sector 47 Pradeep Sharma 3 1 out of 4 35000000 2470.00 14170
265 gurgaon Sector 54 VIVEK SHARMA 3 City Plot Phase 1 109000000 3650.00 25952
268 gurgaon Sector 65 Ankush 3 2 out of 4 22800000 1500.00 12473
413 mysore Hebbal 2nd Stage Rathan 2 7 out of 7 16300000 1100.00 14818

Applying Transformation Techniques¶

In [583]:
# Extreme values/Outliers cause variance

dff.per_sqt_rupees.var()
dff.per_sqt_rupees.std()
Out[583]:
12627149.27491759
Out[583]:
3553.4700329280377
In [584]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
In [585]:
# Normalization
mm1 = MinMaxScaler()
In [586]:
dff1 = mm1.fit_transform(dff.per_sqt_rupees.values.reshape(-1,1))
In [587]:
dff1.min()
dff1.max()
Out[587]:
0.0
Out[587]:
1.0
In [588]:
sns.histplot(dff1, kde = True);
In [589]:
sns.boxplot(dff1);
In [590]:
# Standardization

ss1 = StandardScaler()
In [591]:
dff2 = ss1.fit_transform(dff.per_sqt_rupees.values.reshape(-1,1))
In [592]:
round(dff2.mean())
round(dff2.std())
Out[592]:
0
Out[592]:
1
In [593]:
sns.histplot(dff2, kde = True);
In [594]:
sns.boxplot(dff2);
In [595]:
np.log(dff.per_sqt_rupees)
Out[595]:
0     8.51
1     8.85
2     8.42
3     8.93
4     8.65
      ... 
415   8.76
416   8.52
417   8.36
418   8.80
419   8.77
Name: per_sqt_rupees, Length: 420, dtype: float64
In [596]:
sns.histplot(np.log(dff.per_sqt_rupees), kde = True);
In [597]:
sns.boxplot(np.log(dff.per_sqt_rupees));
In [598]:
# Log transformation seems to be fitting well for this data

# compare the variance
dff.per_sqt_rupees.var()

np.log(dff.per_sqt_rupees).var()
Out[598]:
12627149.27491759
Out[598]:
0.23814150733330514
In [599]:
dff.per_sqt_rupees**(1/3)
Out[599]:
0     17.04
1     19.09
2     16.54
3     19.61
4     17.87
       ... 
415   18.56
416   17.10
417   16.23
418   18.82
419   18.62
Name: per_sqt_rupees, Length: 420, dtype: float64
In [600]:
sns.boxplot(dff.per_sqt_rupees**(1/3));
In [601]:
stats.boxcox(dff.per_sqt_rupees)

len(stats.boxcox(dff.per_sqt_rupees))

type(stats.boxcox(dff.per_sqt_rupees))

x,y = stats.boxcox(dff.per_sqt_rupees)
Out[601]:
(array([2.11195827, 2.1180167 , 2.11020754, 2.11931871, 2.11460509,
        2.11880526, 2.11215123, 2.12519303, 2.12182256, 2.11811077,
        2.12051637, 2.13204143, 2.11444302, 2.11570617, 2.12322127,
        2.11653027, 2.11274285, 2.11654908, 2.11031783, 2.11028263,
        2.11923022, 2.12644576, 2.11965543, 2.11326993, 2.11909397,
        2.11336791, 2.11905534, 2.12376861, 2.11073751, 2.13437529,
        2.109034  , 2.109034  , 2.109034  , 2.13334783, 2.109034  ,
        2.13459321, 2.13095717, 2.109034  , 2.11233005, 2.109034  ,
        2.12541444, 2.109034  , 2.11867025, 2.109034  , 2.12350305,
        2.12830742, 2.11351691, 2.12597449, 2.12754181, 2.109034  ,
        2.13237023, 2.109034  , 2.13307212, 2.12442144, 2.13397708,
        2.1131393 , 2.1214341 , 2.11215123, 2.10983033, 2.12191778,
        2.109034  , 2.1117427 , 2.11582094, 2.1131393 , 2.11462402,
        2.12092258, 2.12099264, 2.109034  , 2.11247659, 2.12222823,
        2.11582379, 2.12010539, 2.10932499, 2.12551196, 2.10796302,
        2.10764248, 2.11940649, 2.11323123, 2.11683285, 2.11606681,
        2.10936271, 2.11706628, 2.12573432, 2.11307169, 2.11754896,
        2.12023905, 2.10295901, 2.11268783, 2.11058757, 2.10764779,
        2.11080974, 2.10832227, 2.10810265, 2.11951005, 2.10547099,
        2.10147698, 2.11923022, 2.11215123, 2.10110878, 2.10576335,
        2.10625589, 2.10794222, 2.10343587, 2.11302164, 2.10826132,
        2.10301671, 2.11565705, 2.11279764, 2.10874669, 2.11118171,
        2.10596111, 2.09751594, 2.11080974, 2.10878109, 2.11832878,
        2.109034  , 2.109034  , 2.109034  , 2.109034  , 2.109034  ,
        2.11075879, 2.10867271, 2.11333999, 2.11392701, 2.10962855,
        2.10614456, 2.10639534, 2.11022082, 2.10923975, 2.10615631,
        2.10585356, 2.109034  , 2.10025353, 2.10757324, 2.11385679,
        2.11292096, 2.11333999, 2.09859975, 2.11064774, 2.10822054,
        2.10785347, 2.109034  , 2.11412221, 2.10438151, 2.10677224,
        2.109034  , 2.11001124, 2.1127867 , 2.11136186, 2.12086111,
        2.09671989, 2.11021197, 2.10028757, 2.11315703, 2.09290595,
        2.11369819, 2.10220465, 2.10078182, 2.10461086, 2.10240779,
        2.09756577, 2.10744993, 2.10557518, 2.10472105, 2.10747145,
        2.11022082, 2.10199894, 2.10680603, 2.09990929, 2.09213461,
        2.10913486, 2.10209069, 2.10287208, 2.109034  , 2.10588352,
        2.10576335, 2.09445579, 2.10605607, 2.10668174, 2.10055745,
        2.10072394, 2.11336791, 2.09588694, 2.10696828, 2.10062424,
        2.10327378, 2.09103735, 2.10086417, 2.10846348, 2.09159365,
        2.09692741, 2.109034  , 2.109034  , 2.10251956, 2.10585956,
        2.10335856, 2.08525251, 2.09702001, 2.09956679, 2.109034  ,
        2.09267388, 2.10664198, 2.11412221, 2.10277006, 2.109034  ,
        2.09670945, 2.10764779, 2.10110878, 2.09695833, 2.11058757,
        2.10742299, 2.10327378, 2.109034  , 2.10607381, 2.09621186,
        2.11347205, 2.109034  , 2.10882029, 2.10812323, 2.11138218,
        2.10586555, 2.11696829, 2.11699158, 2.12200562, 2.10717273,
        2.11128027, 2.11079278, 2.11300014, 2.11332951, 2.11328047,
        2.11065203, 2.11222762, 2.09572229, 2.11094886, 2.09505992,
        2.10734185, 2.10735811, 2.10940033, 2.10078182, 2.10557518,
        2.12182256, 2.12358736, 2.109034  , 2.11323123, 2.10690695,
        2.10980755, 2.11215123, 2.11173874, 2.12616486, 2.12297505,
        2.12023905, 2.11268783, 2.11852906, 2.12581117, 2.12049573,
        2.12064121, 2.10972522, 2.11468377, 2.12797043, 2.11689035,
        2.11923022, 2.12267558, 2.11837462, 2.1250239 , 2.10615631,
        2.13419956, 2.10858342, 2.11962895, 2.12641869, 2.109034  ,
        2.10325961, 2.11654102, 2.10749294, 2.11030024, 2.10463686,
        2.109034  , 2.11514851, 2.109034  , 2.11084783, 2.10166587,
        2.10009949, 2.109034  , 2.11058757, 2.11731016, 2.11358562,
        2.11022967, 2.11141462, 2.11654102, 2.11688252, 2.10647609,
        2.11523577, 2.10476619, 2.10265266, 2.11439173, 2.11524176,
        2.11019424, 2.11652758, 2.10625589, 2.109034  , 2.10466281,
        2.09728488, 2.11202806, 2.10423559, 2.11604441, 2.09456176,
        2.10387154, 2.10384424, 2.10360326, 2.10764779, 2.08212959,
        2.09854336, 2.10189124, 2.09915379, 2.10110878, 2.10450644,
        2.0968861 , 2.10038928, 2.08798098, 2.1191857 , 2.10756256,
        2.10196824, 2.09456176, 2.10222735, 2.10535369, 2.09920822,
        2.10535369, 2.09993535, 2.09455002, 2.09391674, 2.09903524,
        2.09969934, 2.09898026, 2.11373549, 2.1018758 , 2.109034  ,
        2.11604441, 2.10528537, 2.09628684, 2.10764779, 2.10110878,
        2.11092788, 2.10406137, 2.09660472, 2.10240779, 2.09648881,
        2.10524173, 2.10763185, 2.09706102, 2.10265266, 2.10378262,
        2.09778339, 2.10542171, 2.10356854, 2.10146906, 2.09380705,
        2.10622667, 2.11035294, 2.09743591, 2.10734727, 2.1154997 ,
        2.1080511 , 2.08788233, 2.08939143, 2.09258287, 2.10042305,
        2.0919335 , 2.093561  , 2.09137302, 2.08616579, 2.09736559,
        2.09738571, 2.1088789 , 2.08778321, 2.08125326, 2.09532097,
        2.0836058 , 2.0887537 , 2.09480672, 2.09931658, 2.08132016,
        2.09314791, 2.0953997 , 2.08895807, 2.09502559, 2.09817231,
        2.10602646, 2.1032667 , 2.08741567, 2.09990929, 2.10652777,
        2.10477907, 2.10822054, 2.11934805, 2.11716853, 2.109034  ,
        2.10913964, 2.11507278, 2.11191934, 2.11120642, 2.11570617,
        2.11258449, 2.10871222, 2.11710473, 2.10951258, 2.10690136,
        2.10764779, 2.109034  , 2.109034  , 2.10991203, 2.11215123,
        2.11253995, 2.11711752, 2.11472136, 2.12849299, 2.11155565,
        2.11658928, 2.11215123, 2.109034  , 2.11731016, 2.1167698 ]),
 -0.4643823843215321)
Out[601]:
2
Out[601]:
tuple
In [602]:
sns.boxplot(x);

Carpet_Area Column¶

In [603]:
drop_list = [239,259]
dff.Carpet_Area=dff.Carpet_Area.drop(drop_list, axis=0)
In [604]:
Car_median=dff.Carpet_Area.median()
dff.Carpet_Area.fillna(Car_median, inplace=True)
dff.Carpet_Area.isna().sum()
Out[604]:
0
In [605]:
dff[dff.Carpet_Area.isna()]
Out[605]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees

Identifying Outliers¶

IQR Method¶

In [606]:
Q1, Q2, Q3 = tuple(dff.Carpet_Area.quantile(q = [0.25,0.5,0.75]).values)
In [607]:
print(Q1, Q2, Q3)
812.0 1052.5 1400.0
In [608]:
IQR = Q3-Q1
UL = Q3 + 1.5*IQR
LL = Q1-1.5*IQR

print(IQR, UL, LL)
588.0 2282.0 -70.0
In [609]:
dff[(dff.Carpet_Area > UL) | (dff.Carpet_Area < LL)]
Out[609]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
154 haridwar Laksar Road Shakti Verma 1 1 out of 4 14300000 3000.00 2192
200 vadodara Danteshwar Pushpita Roy Choudhury 2 4 out of 9 2500000 2358.00 2174
202 vadodara Gotri Sevasi Road Deepak Singh 3 9 out of 9 5000000 2350.00 5556
208 vadodara Alwar Bypass Road Bindi shah 3 2 out of 2 4200000 2600.00 2545
216 greater-noida Greater Noida West Kamal Bansal 4 18 out of 32 100000 2364.00 4274
221 greater-noida Chi 5 AN SINHA 4 11 out of 22 21000000 3210.00 6533
222 greater-noida Jaypee Greens Nihit 4 6 out of 34 33100000 3441.00 6542
248 gurgaon Sector 112 Y S Dwivedi 3 9 out of 15 46000000 3763.00 12224
258 gurgaon Sector 47 Pradeep Sharma 3 1 out of 4 35000000 2470.00 14170
265 gurgaon Sector 54 VIVEK SHARMA 3 City Plot Phase 1 109000000 3650.00 25952
269 gurgaon Sector 65 Manjunath S 3 Ground out of 32 500000 3069.00 4274
276 mangalore Bajpe Gopal 4 4 out of 8 200000 3030.00 5882
282 mangalore Falnir Santosh Babu Salian 3 2 out of 4 18000000 3900.00 4615
346 agra Sikandra SHWETANG SHARMA 5 3 out of 5 81900000 2410.00 3997

Z-score Method¶

In [610]:
# Suppress Scientific Notation in Numpy
np.set_printoptions(suppress=True)

# Suppress Scientific Notation in Pandas
pd.set_option('display.float_format', '{:.2f}'.format)
In [611]:
zscore_array = stats.zscore(dff.Carpet_Area)
In [612]:
np.where(zscore_array>2,1,0).sum()
np.where(zscore_array<-2,1,0).sum()
Out[612]:
15
Out[612]:
2
In [613]:
zscore_array>2
Out[613]:
0      False
1      False
2      False
3      False
4      False
       ...  
415    False
416    False
417    False
418    False
419    False
Name: Carpet_Area, Length: 420, dtype: bool
In [614]:
dff[zscore_array>2]
Out[614]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
142 ranchi Alwar Bypass Road Avishek Modi Modi 4 3 out of 4 12500000 2250.00 5556
154 haridwar Laksar Road Shakti Verma 1 1 out of 4 14300000 3000.00 2192
200 vadodara Danteshwar Pushpita Roy Choudhury 2 4 out of 9 2500000 2358.00 2174
202 vadodara Gotri Sevasi Road Deepak Singh 3 9 out of 9 5000000 2350.00 5556
208 vadodara Alwar Bypass Road Bindi shah 3 2 out of 2 4200000 2600.00 2545
216 greater-noida Greater Noida West Kamal Bansal 4 18 out of 32 100000 2364.00 4274
221 greater-noida Chi 5 AN SINHA 4 11 out of 22 21000000 3210.00 6533
222 greater-noida Jaypee Greens Nihit 4 6 out of 34 33100000 3441.00 6542
248 gurgaon Sector 112 Y S Dwivedi 3 9 out of 15 46000000 3763.00 12224
258 gurgaon Sector 47 Pradeep Sharma 3 1 out of 4 35000000 2470.00 14170
265 gurgaon Sector 54 VIVEK SHARMA 3 City Plot Phase 1 109000000 3650.00 25952
269 gurgaon Sector 65 Manjunath S 3 Ground out of 32 500000 3069.00 4274
276 mangalore Bajpe Gopal 4 4 out of 8 200000 3030.00 5882
282 mangalore Falnir Santosh Babu Salian 3 2 out of 4 18000000 3900.00 4615
346 agra Sikandra SHWETANG SHARMA 5 3 out of 5 81900000 2410.00 3997

Applying Transformation Techniques¶

In [615]:
dff.Carpet_Area.skew()
Out[615]:
1.672304347952084
In [616]:
sns.boxplot(data=dff.Carpet_Area);
In [617]:
# Extreme values/Outliers cause variance

dff.Carpet_Area.var()
dff.Carpet_Area.std()
Out[617]:
281486.8008523696
Out[617]:
530.5532969008576
In [618]:
mm = MinMaxScaler()
In [619]:
dff_ = mm.fit_transform(dff.Carpet_Area.values.reshape(-1,1))
In [620]:
dff_.min()
dff_.max()
Out[620]:
0.0
Out[620]:
1.0
In [621]:
sns.histplot(dff_, kde = True);
In [622]:
sns.boxplot(dff_);
In [623]:
# Standardization

ss = StandardScaler()

dff_s = ss.fit_transform(dff.Carpet_Area.values.reshape(-1,1))

round(dff_s.mean())
dff_s.std()

sns.histplot(dff_s, kde = True);
In [624]:
sns.boxplot(dff_s);
In [625]:
# log transformation method
np.log(dff.Carpet_Area)
Out[625]:
0     6.91
1     7.05
2     6.97
3     7.49
4     6.69
      ... 
415   6.98
416   6.98
417   7.09
418   6.80
419   7.02
Name: Carpet_Area, Length: 420, dtype: float64
In [626]:
sns.histplot(np.log(dff.Carpet_Area), kde = True);
In [627]:
sns.boxplot(np.log(dff.Carpet_Area));
In [628]:
# cube root transformation  method
dff.Carpet_Area**(1/3)
Out[628]:
0     10.00
1     10.48
2     10.20
3     12.14
4      9.29
       ... 
415   10.23
416   10.26
417   10.63
418    9.65
419   10.37
Name: Carpet_Area, Length: 420, dtype: float64
In [629]:
sns.boxplot(dff.Carpet_Area**(1/3));
In [630]:
(dff.Carpet_Area**(1/3)).skew()
dff.Carpet_Area.skew()
Out[630]:
-0.04396947259226398
Out[630]:
1.672304347952084
In [631]:
import scipy.stats as stats
In [632]:
stats.boxcox(dff.Carpet_Area)

len(stats.boxcox(dff.Carpet_Area))

type(stats.boxcox(dff.Carpet_Area))

x,y = stats.boxcox(dff.Carpet_Area)
Out[632]:
(array([36.60342465, 38.83608626, 37.51921819, 46.7795446 , 33.33918254,
        45.09564664, 40.22831393, 44.49029921, 36.60342465, 43.00915977,
        43.43801727, 42.42343321, 41.55489717, 36.60342465, 40.22831393,
        34.27800037, 32.43533305, 39.34539935, 40.22831393, 32.7465937 ,
        50.50075042, 47.96099206, 42.94739202, 47.80235806, 48.48438448,
        40.22831393, 47.10600059, 49.60803085, 32.19443999, 22.70129238,
        22.0533256 , 29.63094782, 38.97851499, 30.75218253, 37.39895004,
        31.14598915, 24.6706847 , 22.73152132, 32.85546022, 33.28592469,
        25.97259129, 25.46284768, 30.94996522, 47.64296116, 33.63873449,
        34.15835321, 39.14845575, 24.77833042, 28.88535848, 27.35439424,
        28.51332272, 31.04819443, 38.54897227, 28.6014687 , 27.99893593,
        29.20815607, 33.14331244, 35.57311056, 30.086475  , 37.27812921,
        33.28592469, 39.54094628, 41.55489717, 35.81461641, 36.61895551,
        35.00036151, 33.28592469, 34.19259854, 43.36493058, 31.4368166 ,
        30.12740941, 42.13253116, 35.49208998, 26.3213334 , 28.9935158 ,
        27.40113425, 36.4475984 , 25.84645722, 26.44434466, 32.89163623,
        34.15835321, 28.1118766 , 39.45730979, 34.96723052, 31.4368166 ,
        29.42061407, 32.60063978, 37.1415367 , 33.28592469, 40.89929792,
        33.69120681, 45.77991497, 44.33710093, 31.4368166 , 43.92081655,
        38.83608626, 41.03161834, 36.60342465, 35.2474408 , 40.89929792,
        29.42061407, 36.60342465, 40.89929792, 43.63205749, 46.88872727,
        38.11249792, 44.86420952, 44.63104332, 36.13307467, 38.59223361,
        35.41080635, 47.69617883, 37.36879691, 45.09564664, 33.98639365,
        33.28592469, 35.00036151, 30.45211719, 33.28592469, 37.40648292,
        38.25877797, 43.43801727, 38.83608626, 28.84193681, 44.15938336,
        49.10188439, 35.49208998, 40.63280415, 40.22831393, 43.43801727,
        42.00515007, 39.70746606, 36.60342465, 33.28592469, 48.22371281,
        40.9787635 , 43.92081655, 39.26116752, 36.14889322, 35.81461641,
        38.31706749, 37.41401364, 51.46662099, 35.81461641, 38.83608626,
        45.21072735, 41.39890662, 46.44977306, 36.10140804, 43.43801727,
        43.31610776, 32.37996627, 38.11249792, 30.45211719, 57.99999124,
        39.67978201, 31.79985456, 42.82347282, 43.24272515, 37.74325448,
        42.04342245, 36.98892473, 34.83425981, 37.36879691, 27.18992309,
        27.88535708, 30.45211719, 31.4368166 , 30.45211719, 44.63104332,
        33.28592469, 34.44790722, 42.82347282, 35.00036151, 46.88872727,
        29.50499932, 35.2474408 , 45.77991497, 32.37996627, 49.32551329,
        43.43801727, 35.00036151, 32.92775622, 39.65207036, 31.4368166 ,
        32.47217039, 43.61996625, 47.96099206, 43.00915977, 37.8175141 ,
        25.97259129, 36.75831424, 34.15835321, 36.85080459, 36.98892473,
        33.28592469, 36.29082027, 33.28592469, 30.24974754, 38.83608626,
        52.48111879, 35.00036151, 52.40694376, 32.37996627, 44.39611324,
        18.12845607, 29.42061407, 45.21072735, 54.65683745, 37.40648292,
        39.54094628, 38.40426565, 36.52562955, 36.13307467, 34.91745052,
        46.61507887, 52.53665056, 30.6526184 , 37.40648292, 35.81461641,
        36.52562955, 59.64802508, 61.38742663, 50.98699463, 35.16535421,
        33.98639365, 43.80083235, 34.36310234, 37.444115  , 29.73533555,
        38.90739356, 35.81461641, 33.98639365, 34.49864868, 42.13253116,
        38.33162015, 40.69966161, 43.36493058, 38.33162015, 37.40648292,
        35.00036151, 46.76860612, 43.43801727, 32.37996627, 22.0533256 ,
        46.3390968 , 28.77663264, 45.29103387, 63.6983569 , 46.88872727,
        40.89929792, 35.00036151, 48.48438448, 44.11181816, 39.7765559 ,
        37.40648292, 29.63094782, 46.28361566, 53.50401875, 37.40648292,
        42.82347282, 40.22831393, 44.04033194, 46.88872727, 28.33588099,
        62.90146809, 35.97434553, 43.43801727, 43.43801727, 58.5489484 ,
        30.85129634, 38.11249792, 45.77991497, 40.56578906, 41.29444382,
        35.09948895, 58.23959371, 37.40648292, 42.06890968, 40.36378797,
        30.6526184 , 39.06361718, 64.64538183, 36.2120694 , 42.06890968,
        37.66878634, 47.64296116, 37.21750943, 36.60342465, 41.16333119,
        32.98183167, 11.24291181, 36.60342465, 41.29444382, 41.63257905,
        34.73405816, 44.78279979, 38.11249792, 41.43798308, 39.54094628,
        36.13307467, 39.54094628, 48.84596197, 33.98639365, 40.22831393,
        35.70220197, 33.26814532, 35.00036151, 37.36879691, 26.88075378,
        31.4368166 , 33.55102281, 27.30754293, 34.43096987, 31.83773104,
        39.54094628, 37.06534237, 43.43801727, 46.9431815 , 39.95539237,
        36.05383406, 40.22831393, 41.3858694 , 35.97434553, 39.54094628,
        36.60342465, 43.43801727, 34.32909736, 28.33588099, 35.09948895,
        35.00036151, 43.92081655, 40.22831393, 34.15835321, 37.40648292,
        37.36879691, 34.12405926, 41.55489717, 37.36879691, 42.19601531,
        37.40648292, 39.42937466, 38.33162015, 36.60342465, 30.6526184 ,
        45.49660166, 52.95959843, 37.40648292, 34.36310234, 40.22831393,
        42.19601531, 46.88872727, 46.3390968 , 45.49660166, 35.97434553,
        35.65387035, 42.51140332, 35.42708423, 34.3461058 , 43.43801727,
        39.04945184, 28.44696293, 36.29082027, 26.37063334, 40.01018955,
        32.37996627, 32.92775622, 30.45211719, 30.04546242, 29.42061407,
        42.57407973, 30.6526184 , 33.55102281, 40.22831393, 39.12020538,
        38.27336221, 36.60342465, 38.83608626, 33.79580655, 31.4368166 ,
        35.81461641, 40.22831393, 38.83608626, 29.3994648 , 32.37996627,
        44.51380098, 43.70450415, 27.18992309, 30.04546242, 33.10752382,
        45.21072735, 43.27943877, 38.03905714,  3.50779096, 40.63280415,
        24.6706847 , 42.19601531, 39.17667707, 30.45211719, 37.36879691,
        34.83425981, 48.999747  , 35.18179324, 40.63280415, 33.28592469,
        38.03905714, 44.07609587, 47.69617883, 45.56482811, 42.19601531,
        36.8969263 , 50.98699463, 41.94125154, 38.11249792, 45.21072735,
        37.66878634, 37.8175141 , 39.54094628, 35.00036151, 38.33162015]),
 0.3971088052992405)
Out[632]:
2
Out[632]:
tuple
In [633]:
sns.boxplot(x);
In [634]:
dff.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   City             420 non-null    object 
 1   Area             420 non-null    object 
 2   Owner            420 non-null    object 
 3   BHK              420 non-null    object 
 4   Floor            420 non-null    object 
 5   price_in_crores  420 non-null    int32  
 6   Carpet_Area      420 non-null    float64
 7   per_sqt_rupees   420 non-null    int32  
dtypes: float64(1), int32(2), object(5)
memory usage: 23.1+ KB

price_in_crores column --- numerical¶

In [635]:
# finding the skweness of the column
dff.price_in_crores.skew()
Out[635]:
2.7504799201818098
In [636]:
dff.price_in_crores=dff.price_in_crores.astype(int)

Identifying Outliers¶

IQR Method¶

In [637]:
Q1, Q2, Q3 = tuple(dff.price_in_crores.quantile(q = [0.25,0.5,0.75]).values)


print(Q1, Q2, Q3)

IQR = Q3-Q1
UL = Q3 + 1.5*IQR
LL = Q1-1.5*IQR

print(IQR, UL, LL)
3775000.0 6000000.0 12000000.0
8225000.0 24337500.0 -8562500.0
In [638]:
dff[(dff.price_in_crores > UL) | (dff.price_in_crores < LL)]
Out[638]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
11 bangalore Koramangala Block 1 Rahul Jain 3 16 out of 16 38500000 1418.00 20632
22 bangalore Jaya Nagar Block 3 shravan 3 2 out of 4 27500000 1460.00 7705
29 bangalore Yelahanka Sashank Constructions 2 Ground out of 4 51200000 331.00 26471
33 mumbai Goregaon West Motashaw Motashaw 3 4 out of 7 27000000 665.00 23636
35 mumbai Worli Anam khan 3 4 out of 4 59900000 685.00 27136
39 mumbai Mahim West Satyajit Satyajit 2 6 out of 12 37500000 800.00 4274
40 mumbai Borivali West vasant ahir 2 7 out of 7 27000000 450.00 11529
47 mumbai Alwar Bypass Road jagdish Dassani 3 5 out of 7 48900000 404.00 12042
48 mumbai Virar West Raghav Sharma 1 4 out of 15 44900000 575.00 13669
57 mumbai Andheri East DHRUV SHARMA 2 1 out of 11 25200000 935.00 5000
79 chennai Chennai sakthi 2 1 out of 3 29700000 778.00 6198
89 chennai Chennai PURUSHOTHAMAN 2 10 out of 16 74900000 1300.00 4000
90 visakhapatnam Alwar Bypass Road Hari 3 15 out of 16 47500000 823.00 4667
95 visakhapatnam Madhurwada Lakshman 3 1 out of 5 63600000 1150.00 3046
100 visakhapatnam Alwar Bypass Road Ravindra Pamidi 3 2 out of 5 97500000 600.00 3750
110 visakhapatnam Kurmannapalem Jaswanth 2 2 out of 3 43600000 925.00 3700
115 visakhapatnam Tagarapuvalsa Vision Properties 3 3 out of 5 38900000 800.00 4274
121 ranchi Alwar Bypass Road Anuj Krchandra 3 5 out of 6 76400000 1500.00 4200
123 ranchi Ratu Road Abhishek Gupta 1 2 out of 4 31500000 573.00 5497
124 ranchi Harihar S Shree Ram Developers 3 8 out of 12 68600000 1560.00 4400
158 haridwar Patanjali tarun agarwal 2 6 out of 6 50500000 1484.00 3483
210 greater-noida Zeta 1 RAHUL TRIVEDI 2 17 out of 22 47500000 1200.00 3958
215 greater-noida Omicron PRASAD 3 9 out of 14 65400000 1775.00 5363
222 greater-noida Jaypee Greens Nihit 4 6 out of 34 33100000 3441.00 6542
224 greater-noida Greater Noida West Right Value Solutions 2 5 out of 15 43500000 910.00 3912
225 greater-noida Omicron 1 Adhunik Propmart 2 11 out of 20 48500000 840.00 4780
227 greater-noida Sector 1 Greater Noida West Investors Lab 2 19 out of 20 61200000 862.00 5229
229 greater-noida Greater Noida West piyush nair 2 26 out of 27 50500000 615.00 5308
231 greater-noida Alwar Bypass Road Bricksnwall Innovations Pvt.Ltd. 2 1 out of 4 25500000 950.00 5020
232 greater-noida Noida Extention Pragati 3 10 out of 19 68900000 840.00 2429
247 gurgaon Sector 78 Pushpendra Sethi 3 19 out of 20 81100000 1657.00 4894
248 gurgaon Sector 112 Y S Dwivedi 3 9 out of 15 46000000 3763.00 12224
249 gurgaon Sector 102 Rahul Arora 4 3 out of 14 26000000 1800.00 9630
258 gurgaon Sector 47 Pradeep Sharma 3 1 out of 4 35000000 2470.00 14170
265 gurgaon Sector 54 VIVEK SHARMA 3 City Plot Phase 1 109000000 3650.00 25952
272 mangalore Bunts Hostel Road Jeanson Veigas 3 19 out of 47 69500000 1700.00 3971
274 mangalore Kulshekar Naveen Kamath 3 2 out of 2 54800000 1330.00 3487
285 mangalore Padil jaysen 3 5 out of 6 48500000 1070.00 4533
298 mangalore Nandigudda SNEHA 3 3 out of 5 68500000 1341.00 4274
304 raipur Santoshi Nagar Deekaha 3 6 out of 6 29100000 1250.00 2327
305 raipur Naya Raipur Divya Singh 2 4 out of 6 31800000 943.00 3372
306 raipur Hirapur Road Arpit Jain 3 5 out of 5 32500000 799.00 3368
311 raipur Bhatagaon vijay 2 1 out of 4 31500000 815.00 3099
317 raipur Deopuri sandip Agarwal 3 7 out of 10 33300000 1500.00 1852
321 raipur Santoshi Nagar Mitchell 3 4 out of 7 29100000 1250.00 2327
323 raipur Shankar Nagar achyutananda 3 5 out of 11 43400000 960.00 3600
326 raipur Kachna Road Sumit Wadhwani 3 2 out of 10 42900000 1500.00 2860
330 agra Sikandra vishal sharma 2 4 out of 6 25500000 900.00 2833
331 agra Vibhav Nagar Arnav Singh 3 2 out of 3 43500000 1540.00 2753
336 agra Shastri Puram hemant sharma 2 2 out of 5 44400000 848.00 3589
337 agra Agra Shamshabad Raja Kherah Marg Prem lal Taneja 3 1 out of 2 33500000 1350.00 2481
341 agra Fatehabad Road Sanjeev Singh 2 2 out of 2 40500000 1192.00 3400
346 agra Sikandra SHWETANG SHARMA 5 3 out of 5 81900000 2410.00 3997
352 agra Shahganj Sanchit gupta 2 2 out of 11 59900000 1750.00 3328
358 agra Shastri Puram Dr Archika Gupta Dr Archika 2 12 out of 12 41300000 861.00 3944
378 bhiwadi Alwar Bypass Road Rahul Rijhwani 2 4 out of 9 32500000 829.00 2790
405 mysore Siddhartha Layout Nataraj D M 2 Basement out of 1 65700000 1095.00 4000

Applying Transformation Techniques¶

In [640]:
# log transformation method
np.log(dff.price_in_crores)
Out[640]:
0     15.61
1     15.89
2     15.38
3     16.42
4     15.62
       ... 
415   15.20
416   15.75
417   15.61
418   15.61
419   15.79
Name: price_in_crores, Length: 420, dtype: float64
In [ ]:
 
In [641]:
sns.boxplot(np.log(dff.price_in_crores));
In [642]:
# cube root transformation method
dff.price_in_crores**(1/3)
Out[642]:
0     181.71
1     200.00
2     168.69
3     238.11
4     182.72
       ...  
415   158.74
416   190.38
417   181.71
418   181.71
419   193.10
Name: price_in_crores, Length: 420, dtype: float64
In [644]:
sns.boxplot(dff.price_in_crores**(1/3));
In [645]:
# Box-Cox tranformation method
stats.boxcox(dff.price_in_crores)

len(stats.boxcox(dff.price_in_crores))

type(stats.boxcox(dff.price_in_crores))

x,y = stats.boxcox(dff.price_in_crores)
Out[645]:
(array([27.95079616, 28.79848153, 27.30470585, 30.38383809, 27.99904977,
        30.26757691, 27.42216391, 31.28002149, 17.53617617, 29.52749432,
        30.60479773, 33.73538618, 28.40293621, 28.9792627 , 29.40661236,
        27.95079616, 28.72320672, 27.80140474, 29.70078348, 27.80140474,
        26.40692943, 31.28002149, 32.63292756, 29.8113948 , 17.53617617,
        30.60479773, 28.79848153, 30.38383809, 31.76755768, 34.68958496,
        28.52685189, 25.12201448, 31.98743902, 32.57353486, 26.85423153,
        35.22289001, 30.02199489, 30.26757691, 28.18497291, 33.64823654,
        32.57353486, 29.52749432, 29.31289647, 27.95079616, 24.09250186,
        26.15573972, 29.40661236, 34.5344799 , 34.24778452, 28.40293621,
        29.61529192, 25.97578756, 30.14707386, 31.45043446, 30.81199497,
        30.21990256, 19.10014485, 32.35088585, 17.53617617, 30.91096563,
        27.6976861 , 28.79848153, 27.95079616, 27.95079616, 28.9792627 ,
        27.36399308, 27.6976861 , 27.59039329, 28.36052991, 30.14707386,
        29.04881391, 26.15573972, 28.44478198, 28.9792627 , 27.24425686,
        27.18259786, 26.98983043, 29.89196429, 25.6837186 , 32.88285199,
        27.90178761, 27.11967755, 26.40692943, 28.56710454, 26.71232601,
        27.18259786, 30.26757691, 26.78410492, 28.40293621, 35.99229238,
        34.43670673, 28.48608236, 29.37567425, 28.09338592, 26.40692943,
        35.42808859, 27.24425686, 17.53617617, 26.63881196, 25.97578756,
        36.91547391, 25.97578756, 25.97578756, 27.6976861 , 27.95079616,
        28.52685189, 27.11967755, 29.11689157, 28.40293621, 27.36399308,
        34.14946907, 27.74997671, 29.37567425, 27.24425686, 28.72320672,
        33.76965688, 25.57961147, 26.92278282, 26.63881196, 28.72320672,
        28.04657215, 36.06113177, 28.68489264, 33.07482362, 35.68838963,
        28.60685371, 26.78410492, 27.85199919, 27.95079616, 27.74997671,
        27.47926106, 28.68489264, 26.06711555, 26.24182098, 29.89196429,
        28.31754728, 28.87202356, 26.15573972, 27.85199919, 27.11967755,
        27.11967755, 27.11967755, 30.14707386, 26.78410492, 27.30470585,
        29.61529192, 27.95079616, 29.18355899, 27.30470585, 30.14707386,
        26.85423153, 28.68489264, 32.12656242, 27.85199919, 30.56176608,
        31.45043446, 26.63881196, 26.15573972, 34.64307786, 27.24425686,
        25.78429452, 27.6976861 , 26.78410492, 26.40692943, 27.30470585,
        27.85199919, 31.00709651, 25.47170593, 24.86327953, 24.26409673,
        28.27397184, 25.47170593, 25.78429452, 26.56347349, 26.78410492,
        28.22978638, 23.9099021 , 26.32550611, 28.18497291, 25.12201448,
        27.95079616, 28.79848153, 25.12201448, 26.06711555, 25.47170593,
        25.24326576, 30.81199497, 27.11967755, 28.761066  , 27.11967755,
        25.97578756, 28.60685371, 27.11967755, 26.40692943, 28.18497291,
        27.24425686, 31.53253095, 25.47170593, 26.06711555, 26.40692943,
        25.47170593, 29.15039777, 27.42216391, 28.87202356, 24.72473424,
        27.95079616, 22.46030498, 25.6837186 , 26.92278282, 30.02199489,
        34.43670673, 27.18259786, 26.56347349, 25.35970284, 27.30470585,
        35.52392467, 17.53617617, 25.6837186 , 24.09250186, 26.40692943,
        28.18497291, 31.76755768, 33.23707247, 29.40661236, 34.14179368,
        34.50681354, 28.79848153, 35.29629949, 27.74997671, 34.64307786,
        28.09338592, 32.38900235, 35.7034398 , 25.24326576, 27.6976861 ,
        27.0554412 , 27.47926106, 28.18497291, 26.24182098, 27.42216391,
        28.79848153, 31.28002149, 30.71000484, 27.11967755, 24.42597887,
        29.24887544, 26.40692943, 36.26895868, 34.32892521, 32.45161133,
        30.02199489, 27.64450237, 30.49615582, 31.36630055, 29.43725783,
        28.9792627 , 26.92278282, 17.53617617, 33.42051256, 29.24887544,
        30.02199489, 30.31457295, 30.91096563, 32.12656242, 25.78429452,
        37.31086014, 27.18259786, 30.71000484, 32.02977097, 21.28510544,
        25.12201448, 28.761066  , 35.73335791, 27.85199919, 34.91979099,
        28.18497291, 19.10014485, 29.49769337, 28.18497291, 26.40692943,
        24.72473424, 26.78410492, 31.28002149, 28.18497291, 28.60685371,
        34.50681354, 29.15039777, 28.761066  , 28.18497291, 27.42216391,
        27.95079616, 26.40692943, 26.15573972, 28.60685371, 29.49769337,
        26.78410492, 30.02199489, 27.11967755, 35.68335928, 27.30470585,
        25.47170593, 28.68489264, 28.31754728, 27.53532484, 32.81644729,
        33.10582129, 33.17710299, 25.97578756, 26.92278282, 28.60685371,
        25.24326576, 33.07482362, 23.9099021 , 26.40692943, 25.47170593,
        26.24182098, 25.97578756, 33.25683744, 30.38383809, 27.36399308,
        25.97578756, 32.81644729, 26.24182098, 34.13410184, 25.47170593,
        26.48621412, 34.09539327, 24.86327953, 30.14707386, 25.47170593,
        32.38900235, 34.14179368, 28.31754728, 26.40692943, 30.38383809,
        28.18497291, 34.21028857, 33.27649216, 26.92278282, 27.30470585,
        27.24425686, 33.90353605, 25.78429452, 26.40692943, 31.19148065,
        27.95079616, 36.30321153, 27.6976861 , 26.78410492, 26.92278282,
        27.0554412 , 28.18497291, 35.22289001, 27.47926106, 25.35970284,
        27.0554412 , 28.18497291, 25.35970284, 33.96863845, 29.15039777,
        26.63881196, 23.5050292 , 25.24326576, 23.71472489, 26.48621412,
        24.57922331, 25.24326576, 30.38383809, 23.27837951, 31.06348072,
        26.56347349, 25.78429452, 24.09250186, 31.41703123, 25.78429452,
        31.26249816, 24.72473424, 25.6837186 , 33.17710299, 30.60479773,
        24.99551698, 25.97578756, 25.12201448, 24.86327953, 24.86327953,
        27.90178761, 27.42216391, 23.03166067, 24.57922331, 25.97578756,
        28.79848153, 27.95079616, 27.11967755, 28.31754728, 29.31289647,
        27.85199919, 28.87202356, 27.85199919, 27.42216391, 28.9792627 ,
        27.85199919, 29.49769337, 27.95079616, 27.74997671, 27.30470585,
        35.53965802, 27.95079616, 28.60685371, 28.60685371, 28.40293621,
        27.53532484, 30.60479773, 28.9792627 , 30.96897411, 28.79848153,
        26.78410492, 28.36052991, 27.95079616, 27.95079616, 28.48608236]),
 0.06860705540578052)
Out[645]:
2
Out[645]:
tuple
In [646]:
sns.boxplot(x);
In [647]:
# Standardization

ss2 = StandardScaler()

dff4 = ss2.fit_transform(dff.price_in_crores.values.reshape(-1,1))

round(dff4.mean())
dff4.std()
Out[647]:
0
Out[647]:
1.0
In [648]:
sns.boxplot(dff4);
In [649]:
# Normalization
mm2 = MinMaxScaler()

dff5 = mm2.fit_transform(dff.price_in_crores.values.reshape(-1,1))

dff5.min()
dff5.max()


sns.boxplot(dff5);
In [650]:
dff.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   City             420 non-null    object 
 1   Area             420 non-null    object 
 2   Owner            420 non-null    object 
 3   BHK              420 non-null    object 
 4   Floor            420 non-null    object 
 5   price_in_crores  420 non-null    int32  
 6   Carpet_Area      420 non-null    float64
 7   per_sqt_rupees   420 non-null    int32  
dtypes: float64(1), int32(2), object(5)
memory usage: 23.1+ KB
In [651]:
dff.isna().sum()
Out[651]:
City               0
Area               0
Owner              0
BHK                0
Floor              0
price_in_crores    0
Carpet_Area        0
per_sqt_rupees     0
dtype: int64
In [652]:
dff.to_csv('MagicBricks_cleaned.csv')
In [ ]:
 

Data Visiualization

In [2]:
# import libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
plt.style.use('bmh')

import warnings
warnings.filterwarnings('ignore')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# for HD visualizations
%config InlineBackend.figure_format='retina'
In [3]:
data = pd.read_csv(r"C:\Users\GUDLA RAGUWING\Data Science Course\MagicBrics_Webscraping_project\MagicBricks_cleaned.csv")
In [4]:
data
Out[4]:
Unnamed: 0 City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
0 0 bangalore Kattigenahalli Omkar Pandey 3 Ground out of 4 6000000 1000.0 4950
1 1 bangalore Ayodaya Nagar gokul 2 Ground out of 4 8000000 1150.0 6957
2 2 bangalore Electronic City Thayumanavan 2 4 out of 4 4800000 1060.0 4528
3 3 bangalore Tumkur Road Prasanna 3 14 out of 15 13500000 1790.0 7542
4 4 bangalore Kudlu Gate Akash Akash 2 2 out of 4 6100000 803.0 5706
... ... ... ... ... ... ... ... ... ...
415 415 mysore Alwar Bypass Road Nayana A.S 2 Ground out of 3 4000000 1070.0 6389
416 416 mysore Kuvempunagar Srikanth 2 1 out of 3 6900000 1080.0 5000
417 417 mysore Alwar Bypass Road MANJUNATHA 2 Ground out of 2 6000000 1200.0 4274
418 418 mysore V V Mohalla Chakko 2 Ground out of 5 6000000 900.0 6667
419 419 mysore Sriramapura Prasanna 2 1 out of 3 7200000 1115.0 6457

420 rows × 9 columns

In [5]:
# Droping unnecessery columns
data.drop(['Unnamed: 0'], axis=1, inplace=True)
In [6]:
# mathematical computation
data.price_in_crores=data.price_in_crores.values/10000000
In [7]:
data.head(15)
Out[7]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
0 bangalore Kattigenahalli Omkar Pandey 3 Ground out of 4 0.60 1000.0 4950
1 bangalore Ayodaya Nagar gokul 2 Ground out of 4 0.80 1150.0 6957
2 bangalore Electronic City Thayumanavan 2 4 out of 4 0.48 1060.0 4528
3 bangalore Tumkur Road Prasanna 3 14 out of 15 1.35 1790.0 7542
4 bangalore Kudlu Gate Akash Akash 2 2 out of 4 0.61 803.0 5706
5 bangalore Hal Stage 2 av nath 3 1 out of 4 1.30 1640.0 7303
6 bangalore Murugeshpalya Sangeeta Pillai 2 3 out of 5 0.50 1250.0 5000
7 bangalore Begur Road ganga k 3 Basement out of 17 1.80 1588.0 11335
8 bangalore Judicial Layout Anjan K 3 3 out of 14 0.01 1000.0 8889
9 bangalore Bellandur Omesh Saraf 2 2 out of 10 1.02 1465.0 6997
10 bangalore Kasturi Nagar Mohd Hussain 3 Ground out of 7 1.45 1500.0 8146
11 bangalore Koramangala Block 1 Rahul Jain 3 16 out of 16 3.85 1418.0 20632
12 bangalore Sarjapur Road Gaurav Kumar GAURAV 3 Ground out of 5 0.70 1350.0 5655
13 bangalore Devanahalli Ancy 2 1 out of 7 0.85 1000.0 6071
14 bangalore Hanumantha Nagar Anoop 2 2 out of 4 0.98 1250.0 9800
In [8]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   City             420 non-null    object 
 1   Area             420 non-null    object 
 2   Owner            420 non-null    object 
 3   BHK              420 non-null    int64  
 4   Floor            420 non-null    object 
 5   price_in_crores  420 non-null    float64
 6   Carpet_Area      420 non-null    float64
 7   per_sqt_rupees   420 non-null    int64  
dtypes: float64(2), int64(2), object(4)
memory usage: 26.4+ KB
In [9]:
data.describe()
Out[9]:
BHK price_in_crores Carpet_Area per_sqt_rupees
count 420.000000 420.000000 420.000000 420.000000
mean 2.378571 1.221786 1156.371429 5144.238095
std 0.754947 1.634996 530.553297 3553.470033
min 1.000000 0.010000 9.000000 1500.000000
25% 2.000000 0.377500 812.000000 3295.000000
50% 2.000000 0.600000 1052.500000 4274.000000
75% 3.000000 1.200000 1400.000000 5481.250000
max 5.000000 10.900000 3900.000000 27136.000000
In [10]:
%config InlineBackend.figure_format='retina'

# magic command to generate HD charts

Univariate Analysis¶

Catagorical¶

Pie Chart¶

In [37]:
plt.figure(figsize=(7,7), dpi = 300)
explode = (0.1,0.1,0.1,0.1,0.1)
plt.pie(x = data.Area.value_counts().values[0:5],
        labels = data.Area.value_counts().index[0:5], autopct = '%.2f%%', radius = 0.7, pctdistance=0.5,explode =explode)
plt.title('Percentage of Properties available in top Five Areas')
plt.legend(loc = 'upper right')
plt.show();

Numerical¶

Histogram¶

In [36]:
plt.figure(figsize=(15,5), dpi=300)
plt.title('Number_of properties for the range of Carpet_Area', fontsize=20, color = 'Black')
plt.xlabel('Carpet_Area',fontsize = 20, color = 'black')
plt.ylabel('count',fontsize = 20, color = 'black')
sns.histplot( x ='Carpet_Area', data = data, color = 'orange');

Box Plot¶

In [35]:
plt.figure(figsize=(12,5), dpi = 300)
plt.boxplot(data.BHK,patch_artist=True, medianprops=dict(color = 'black'))
plt.title('Distribution of Properties over BHKs', color = 'red',fontsize=20)
plt.xlabel('BHK',fontsize = 20, color = 'black')
plt.ylabel('count',fontsize = 20, color = 'black')
plt.show();

Bivariate Analysis¶

1.Categorical Vs Catagorical¶

In [14]:
data.BHK = data.BHK.astype(str)
In [15]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   City             420 non-null    object 
 1   Area             420 non-null    object 
 2   Owner            420 non-null    object 
 3   BHK              420 non-null    object 
 4   Floor            420 non-null    object 
 5   price_in_crores  420 non-null    float64
 6   Carpet_Area      420 non-null    float64
 7   per_sqt_rupees   420 non-null    int64  
dtypes: float64(2), int64(1), object(5)
memory usage: 26.4+ KB

Count Plot¶

In [16]:
sns.__version__
Out[16]:
'0.12.1'
In [17]:
plt.figure(figsize=(12,5), dpi = 300)
plt.title(' Count of BHKs', color = 'Green',fontsize=20)
sns.countplot(x=data.BHK, width = 0.5);
In [ ]:
 

Categorical & Numerical¶

Bar Chart¶

  • price_in_crores Vs City Analysis
In [18]:
plt.figure(figsize=(12,5), dpi = 300)
plt.title(' Price_in_Crores Vs City', color = 'blue',fontsize=20)
sns.barplot(x=data.price_in_crores, y= data.City, ci= False, estimator='median' );

Bar-Chart¶

  • Area Vs Price_in_Crores
In [19]:
plt.figure(figsize=(12,5), dpi = 300)
plt.title('Top 12 Areas Vs Price_in_Crores', color = 'blue',fontsize=20)
plt.xlabel('Areas',fontsize = 15, color = 'black')
plt.ylabel('Price_in_Crores',fontsize = 15, color = 'black')
plt.xticks(rotation = 90)
sns.barplot(x=data.Area.value_counts().index[0:12], y= data.price_in_crores.value_counts().index[0:12], ci= False, width = 0.5);

Numerical Vs Numerical¶

Scatter-plot¶

  • Carpet_Area vs per_sqt_rupees
In [20]:
plt.figure(figsize=(12,5), dpi = 300)
plt.scatter(data.Carpet_Area, data.per_sqt_rupees, label = 'per_sqt_rupees')

plt.title('Scatter plot - Carpet_Area vs per_sqt_rupees', fontsize=20)

plt.xlabel('Carpet_Area',fontsize = 15, color = 'black')
plt.ylabel('per_sqt_rupees',fontsize = 15, color = 'black')
plt.legend()
plt.show();

Scatter plot¶

  • Carpet_Area vs price_in_crores
In [21]:
plt.figure(figsize=(12,5), dpi = 300)
plt.scatter(data.Carpet_Area, data.price_in_crores, label = 'price')

plt.title('Scatter plot - Carpet_Area vs price_in_crores', fontsize=20)
plt.xlabel('Carpet_Area',fontsize = 15, color = 'black')
plt.ylabel('price_in_crores',fontsize = 15, color = 'black')
plt.legend()
plt.show();

Heat-Map Using Crosstab¶

In [22]:
# plot corsstab data

pd.crosstab(index=data.City, columns=data.BHK)
Out[22]:
BHK 1 2 3 4 5
City
agra 0 16 12 0 2
bangalore 0 12 17 1 0
bhiwadi 5 22 3 0 0
chennai 4 22 4 0 0
greater-noida 1 15 10 4 0
gurgaon 1 7 16 6 0
haridwar 8 13 8 1 0
mangalore 1 18 10 1 0
mumbai 11 14 4 1 0
mysore 1 17 9 3 0
raipur 2 15 13 0 0
ranchi 1 9 18 1 1
vadodara 4 14 9 3 0
visakhapatnam 1 14 15 0 0
In [23]:
i=pd.crosstab(index=data.City, columns=data.BHK)
In [24]:
plt.figure(figsize=(15,7), dpi=300)
plt.title('HeatMap - BHK vs City', fontsize=20)
sns.heatmap(i, annot=True, fmt = '.2f', linewidths=0.5, cmap='cool', linecolor='black');
plt.xlabel('BHK',fontsize = 15, color = 'black')
plt.ylabel('City',fontsize = 15, color = 'black');
# fmt=d for integers; fmt = .2f for float with 2 decimals

Heat Map using Corellation¶

In [25]:
data.BHK=data.BHK.astype(int)
In [26]:
data_cor = data.corr()
In [27]:
plt.figure(figsize=(16,10), dpi=300)
plt.title('Correlation between Numerical columns', fontsize=20)
sns.heatmap(data_cor, annot=True, fmt = '.2f', linewidths=0.5, cmap='cool', linecolor='black');

# fmt=d for integers; fmt = .2f for float with 2 decimals

Multi-Variate¶

Pair-plot¶

In [28]:
%%time
# the above command is a magic command to display wall time in seconds
plt.figure(figsize=(16,8), dpi= 300)
sns.pairplot(data)
plt.show();
<Figure size 4800x2400 with 0 Axes>
CPU times: total: 4.8 s
Wall time: 5.69 s

Multi-Variate analysis Using hue parameter¶

Relational Plot's¶

In [29]:
plt.figure(figsize=(15,5), dpi = 300)
sns.relplot(data = data, x = 'price_in_crores', y= 'City',hue = 'BHK')
plt.show();
<Figure size 4500x1500 with 0 Axes>

catplot ----> Kind = 'swarm'¶

In [30]:
plt.figure(figsize=(15,5), dpi = 300)
sns.catplot(x="price_in_crores", y="City", kind="swarm",hue = 'BHK',color = 'red', data=data);
<Figure size 4500x1500 with 0 Axes>

catplot ----> Kind = 'violin '¶

In [31]:
plt.figure(figsize=(15,8), dpi = 300)
sns.catplot(x="price_in_crores",y="City",kind='violin',data=data);
<Figure size 4500x2400 with 0 Axes>

Creating User defined function to view the data frame accordingly¶

In [32]:
data.tail()
Out[32]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
415 mysore Alwar Bypass Road Nayana A.S 2 Ground out of 3 0.40 1070.0 6389
416 mysore Kuvempunagar Srikanth 2 1 out of 3 0.69 1080.0 5000
417 mysore Alwar Bypass Road MANJUNATHA 2 Ground out of 2 0.60 1200.0 4274
418 mysore V V Mohalla Chakko 2 Ground out of 5 0.60 900.0 6667
419 mysore Sriramapura Prasanna 2 1 out of 3 0.72 1115.0 6457
In [33]:
def user_data_frame():
    Data = input('enter price_in_croers or BHK or City')
    if Data=='City':
        x = input('enter the City name')
        if(x=='bangalore'):
            return(data[data.City=='bangalore'])
        elif(x=='mysore'):
            return(data[data.City=='mysore'])
    elif Data=='BHK':
        x = int(input('enter the BHK Number'))
        if(x==2):
            return(data[data.BHK==2])
        elif(x==3):
            return(data[data.BHK==3])
    elif Data =='price_in_crores':
        x = float(input('enter the price'))
        if(x>0.9):
            return(data[data.price_in_crores>0.9])
        
In [34]:
user_data_frame()
enter price_in_croers or BHK or CityBHK
enter the BHK Number2
Out[34]:
City Area Owner BHK Floor price_in_crores Carpet_Area per_sqt_rupees
1 bangalore Ayodaya Nagar gokul 2 Ground out of 4 0.80 1150.0 6957
2 bangalore Electronic City Thayumanavan 2 4 out of 4 0.48 1060.0 4528
4 bangalore Kudlu Gate Akash Akash 2 2 out of 4 0.61 803.0 5706
6 bangalore Murugeshpalya Sangeeta Pillai 2 3 out of 5 0.50 1250.0 5000
9 bangalore Bellandur Omesh Saraf 2 2 out of 10 1.02 1465.0 6997
... ... ... ... ... ... ... ... ...
415 mysore Alwar Bypass Road Nayana A.S 2 Ground out of 3 0.40 1070.0 6389
416 mysore Kuvempunagar Srikanth 2 1 out of 3 0.69 1080.0 5000
417 mysore Alwar Bypass Road MANJUNATHA 2 Ground out of 2 0.60 1200.0 4274
418 mysore V V Mohalla Chakko 2 Ground out of 5 0.60 900.0 6667
419 mysore Sriramapura Prasanna 2 1 out of 3 0.72 1115.0 6457

208 rows × 8 columns

In [ ]: